mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-12 20:23:11 +00:00
Merge branch 'master' into mutagen-metadata
This commit is contained in:
commit
8b3127cf67
104
.github/workflows/build.yml
vendored
104
.github/workflows/build.yml
vendored
@ -107,10 +107,10 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
- uses: conda-incubator/setup-miniconda@v2
|
||||
- uses: conda-incubator/setup-miniconda@v3
|
||||
with:
|
||||
miniforge-variant: Mambaforge
|
||||
use-mamba: true
|
||||
@ -121,16 +121,14 @@ jobs:
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
sudo apt -y install zip pandoc man sed
|
||||
reqs=$(mktemp)
|
||||
cat > "$reqs" << EOF
|
||||
cat > ./requirements.txt << EOF
|
||||
python=3.10.*
|
||||
pyinstaller
|
||||
cffi
|
||||
brotli-python
|
||||
secretstorage
|
||||
EOF
|
||||
sed -E '/^(brotli|secretstorage).*/d' requirements.txt >> "$reqs"
|
||||
mamba create -n build --file "$reqs"
|
||||
python devscripts/install_deps.py --print \
|
||||
--exclude brotli --exclude brotlicffi \
|
||||
--include secretstorage --include pyinstaller >> ./requirements.txt
|
||||
mamba create -n build --file ./requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@ -144,9 +142,9 @@ jobs:
|
||||
run: |
|
||||
unset LD_LIBRARY_PATH # Harmful; set by setup-python
|
||||
conda activate build
|
||||
python pyinst.py --onedir
|
||||
python -m bundle.pyinstaller --onedir
|
||||
(cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .)
|
||||
python pyinst.py
|
||||
python -m bundle.pyinstaller
|
||||
mv ./dist/yt-dlp_linux ./yt-dlp_linux
|
||||
mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
|
||||
|
||||
@ -164,13 +162,15 @@ jobs:
|
||||
done
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
yt-dlp
|
||||
yt-dlp.tar.gz
|
||||
yt-dlp_linux
|
||||
yt-dlp_linux.zip
|
||||
compression-level: 0
|
||||
|
||||
linux_arm:
|
||||
needs: process
|
||||
@ -201,17 +201,18 @@ jobs:
|
||||
dockerRunArgs: --volume "${PWD}/repo:/repo"
|
||||
install: | # Installing Python 3.10 from the Deadsnakes repo raises errors
|
||||
apt update
|
||||
apt -y install zlib1g-dev python3.8 python3.8-dev python3.8-distutils python3-pip
|
||||
apt -y install zlib1g-dev libffi-dev python3.8 python3.8-dev python3.8-distutils python3-pip
|
||||
python3.8 -m pip install -U pip setuptools wheel
|
||||
# Cannot access requirements.txt from the repo directory at this stage
|
||||
python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage
|
||||
# Cannot access any files from the repo directory at this stage
|
||||
python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage cffi
|
||||
|
||||
run: |
|
||||
cd repo
|
||||
python3.8 -m pip install -U Pyinstaller secretstorage -r requirements.txt # Cached version may be out of date
|
||||
python3.8 devscripts/install_deps.py -o --include build
|
||||
python3.8 devscripts/install_deps.py --include pyinstaller --include secretstorage # Cached version may be out of date
|
||||
python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
|
||||
python3.8 devscripts/make_lazy_extractors.py
|
||||
python3.8 pyinst.py
|
||||
python3.8 -m bundle.pyinstaller
|
||||
|
||||
if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then
|
||||
arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}"
|
||||
@ -224,10 +225,12 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-linux_${{ matrix.architecture }}
|
||||
path: | # run-on-arch-action designates armv7l as armv7
|
||||
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
|
||||
compression-level: 0
|
||||
|
||||
macos:
|
||||
needs: process
|
||||
@ -240,9 +243,10 @@ jobs:
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
brew install coreutils
|
||||
python3 -m pip install -U --user pip setuptools wheel
|
||||
python3 devscripts/install_deps.py --user -o --include build
|
||||
python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt
|
||||
# We need to ignore wheels otherwise we break universal2 builds
|
||||
python3 -m pip install -U --user --no-binary :all: Pyinstaller -r requirements.txt
|
||||
python3 -m pip install -U --user --no-binary :all: -r requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@ -250,9 +254,9 @@ jobs:
|
||||
python3 devscripts/make_lazy_extractors.py
|
||||
- name: Build
|
||||
run: |
|
||||
python3 pyinst.py --target-architecture universal2 --onedir
|
||||
python3 -m bundle.pyinstaller --target-architecture universal2 --onedir
|
||||
(cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .)
|
||||
python3 pyinst.py --target-architecture universal2
|
||||
python3 -m bundle.pyinstaller --target-architecture universal2
|
||||
|
||||
- name: Verify --update-to
|
||||
if: vars.UPDATE_TO_VERIFICATION
|
||||
@ -265,11 +269,13 @@ jobs:
|
||||
[[ "$version" != "$downgraded_version" ]]
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
dist/yt-dlp_macos
|
||||
dist/yt-dlp_macos.zip
|
||||
compression-level: 0
|
||||
|
||||
macos_legacy:
|
||||
needs: process
|
||||
@ -293,8 +299,8 @@ jobs:
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
brew install coreutils
|
||||
python3 -m pip install -U --user pip setuptools wheel
|
||||
python3 -m pip install -U --user Pyinstaller -r requirements.txt
|
||||
python3 devscripts/install_deps.py --user -o --include build
|
||||
python3 devscripts/install_deps.py --user --include pyinstaller
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@ -302,7 +308,7 @@ jobs:
|
||||
python3 devscripts/make_lazy_extractors.py
|
||||
- name: Build
|
||||
run: |
|
||||
python3 pyinst.py
|
||||
python3 -m bundle.pyinstaller
|
||||
mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy
|
||||
|
||||
- name: Verify --update-to
|
||||
@ -316,10 +322,12 @@ jobs:
|
||||
[[ "$version" != "$downgraded_version" ]]
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
dist/yt-dlp_macos_legacy
|
||||
compression-level: 0
|
||||
|
||||
windows:
|
||||
needs: process
|
||||
@ -328,13 +336,14 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with: # 3.8 is used for Win7 support
|
||||
python-version: "3.8"
|
||||
- name: Install Requirements
|
||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
python -m pip install -U pip setuptools wheel py2exe
|
||||
pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py --include py2exe
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@ -342,10 +351,10 @@ jobs:
|
||||
python devscripts/make_lazy_extractors.py
|
||||
- name: Build
|
||||
run: |
|
||||
python setup.py py2exe
|
||||
python -m bundle.py2exe
|
||||
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
|
||||
python pyinst.py
|
||||
python pyinst.py --onedir
|
||||
python -m bundle.pyinstaller
|
||||
python -m bundle.pyinstaller --onedir
|
||||
Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
|
||||
|
||||
- name: Verify --update-to
|
||||
@ -362,12 +371,14 @@ jobs:
|
||||
}
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
dist/yt-dlp.exe
|
||||
dist/yt-dlp_min.exe
|
||||
dist/yt-dlp_win.zip
|
||||
compression-level: 0
|
||||
|
||||
windows32:
|
||||
needs: process
|
||||
@ -376,14 +387,15 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.8"
|
||||
architecture: "x86"
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
python -m pip install -U pip setuptools wheel
|
||||
pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@ -391,7 +403,7 @@ jobs:
|
||||
python devscripts/make_lazy_extractors.py
|
||||
- name: Build
|
||||
run: |
|
||||
python pyinst.py
|
||||
python -m bundle.pyinstaller
|
||||
|
||||
- name: Verify --update-to
|
||||
if: vars.UPDATE_TO_VERIFICATION
|
||||
@ -407,10 +419,12 @@ jobs:
|
||||
}
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
dist/yt-dlp_x86.exe
|
||||
compression-level: 0
|
||||
|
||||
meta_files:
|
||||
if: inputs.meta_files && always() && !cancelled()
|
||||
@ -424,7 +438,11 @@ jobs:
|
||||
- windows32
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: artifact
|
||||
pattern: build-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Make SHA2-SUMS files
|
||||
run: |
|
||||
@ -459,8 +477,10 @@ jobs:
|
||||
done
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
SHA*SUMS*
|
||||
_update_spec
|
||||
SHA*SUMS*
|
||||
compression-level: 0
|
||||
|
4
.github/workflows/core.yml
vendored
4
.github/workflows/core.yml
vendored
@ -49,11 +49,11 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: pip install pytest -r requirements.txt
|
||||
run: python3 ./devscripts/install_deps.py --include dev
|
||||
- name: Run tests
|
||||
continue-on-error: False
|
||||
run: |
|
||||
|
8
.github/workflows/download.yml
vendored
8
.github/workflows/download.yml
vendored
@ -11,11 +11,11 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.9
|
||||
- name: Install test requirements
|
||||
run: pip install pytest -r requirements.txt
|
||||
run: python3 ./devscripts/install_deps.py --include dev
|
||||
- name: Run tests
|
||||
continue-on-error: true
|
||||
run: python3 ./devscripts/run_tests.py download
|
||||
@ -38,11 +38,11 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: pip install pytest -r requirements.txt
|
||||
run: python3 ./devscripts/install_deps.py --include dev
|
||||
- name: Run tests
|
||||
continue-on-error: true
|
||||
run: python3 ./devscripts/run_tests.py download
|
||||
|
10
.github/workflows/quick-test.yml
vendored
10
.github/workflows/quick-test.yml
vendored
@ -11,11 +11,11 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python 3.8
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.8'
|
||||
- name: Install test requirements
|
||||
run: pip install pytest -r requirements.txt
|
||||
run: python3 ./devscripts/install_deps.py --include dev
|
||||
- name: Run tests
|
||||
run: |
|
||||
python3 -m yt_dlp -v || true
|
||||
@ -26,10 +26,10 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- name: Install flake8
|
||||
run: pip install flake8
|
||||
run: python3 ./devscripts/install_deps.py -o --include dev
|
||||
- name: Make lazy extractors
|
||||
run: python devscripts/make_lazy_extractors.py
|
||||
run: python3 ./devscripts/make_lazy_extractors.py
|
||||
- name: Run flake8
|
||||
run: flake8 .
|
||||
|
6
.github/workflows/release-master.yml
vendored
6
.github/workflows/release-master.yml
vendored
@ -6,8 +6,10 @@ on:
|
||||
paths:
|
||||
- "yt_dlp/**.py"
|
||||
- "!yt_dlp/version.py"
|
||||
- "setup.py"
|
||||
- "pyinst.py"
|
||||
- "bundle/*.py"
|
||||
- "pyproject.toml"
|
||||
- "Makefile"
|
||||
- ".github/workflows/build.yml"
|
||||
concurrency:
|
||||
group: release-master
|
||||
permissions:
|
||||
|
9
.github/workflows/release-nightly.yml
vendored
9
.github/workflows/release-nightly.yml
vendored
@ -18,7 +18,14 @@ jobs:
|
||||
- name: Check for new commits
|
||||
id: check_for_new_commits
|
||||
run: |
|
||||
relevant_files=("yt_dlp/*.py" ':!yt_dlp/version.py' "setup.py" "pyinst.py")
|
||||
relevant_files=(
|
||||
"yt_dlp/*.py"
|
||||
':!yt_dlp/version.py'
|
||||
"bundle/*.py"
|
||||
"pyproject.toml"
|
||||
"Makefile"
|
||||
".github/workflows/build.yml"
|
||||
)
|
||||
echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT"
|
||||
|
||||
release:
|
||||
|
26
.github/workflows/release.yml
vendored
26
.github/workflows/release.yml
vendored
@ -71,7 +71,7 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
@ -246,15 +246,16 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
sudo apt -y install pandoc man
|
||||
python -m pip install -U pip setuptools wheel twine
|
||||
python -m pip install -U -r requirements.txt
|
||||
python devscripts/install_deps.py -o --include build
|
||||
|
||||
- name: Prepare
|
||||
env:
|
||||
@ -266,14 +267,19 @@ jobs:
|
||||
run: |
|
||||
python devscripts/update-version.py -c "${{ env.channel }}" -r "${{ env.target_repo }}" -s "${{ env.suffix }}" "${{ env.version }}"
|
||||
python devscripts/make_lazy_extractors.py
|
||||
sed -i -E "s/(name=')[^']+(', # package name)/\1${{ env.pypi_project }}\2/" setup.py
|
||||
sed -i -E '0,/(name = ")[^"]+(")/s//\1${{ env.pypi_project }}\2/' pyproject.toml
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
rm -rf dist/*
|
||||
make pypi-files
|
||||
printf '%s\n\n' \
|
||||
'Official repository: <https://github.com/yt-dlp/yt-dlp>' \
|
||||
'**PS**: Some links in this document will not work since this is a copy of the README.md from Github' > ./README.md.new
|
||||
cat ./README.md >> ./README.md.new && mv -f ./README.md.new ./README.md
|
||||
python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
|
||||
python setup.py sdist bdist_wheel
|
||||
make clean-cache
|
||||
python -m build --no-isolation .
|
||||
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
@ -290,8 +296,12 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: artifact
|
||||
pattern: build-*
|
||||
merge-multiple: true
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
|
10
MANIFEST.in
10
MANIFEST.in
@ -1,10 +0,0 @@
|
||||
include AUTHORS
|
||||
include Changelog.md
|
||||
include LICENSE
|
||||
include README.md
|
||||
include completions/*/*
|
||||
include supportedsites.md
|
||||
include yt-dlp.1
|
||||
include requirements.txt
|
||||
recursive-include devscripts *
|
||||
recursive-include test *
|
61
Makefile
61
Makefile
@ -6,11 +6,11 @@ doc: README.md CONTRIBUTING.md issuetemplates supportedsites
|
||||
ot: offlinetest
|
||||
tar: yt-dlp.tar.gz
|
||||
|
||||
# Keep this list in sync with MANIFEST.in
|
||||
# Keep this list in sync with pyproject.toml includes/artifacts
|
||||
# intended use: when building a source distribution,
|
||||
# make pypi-files && python setup.py sdist
|
||||
# make pypi-files && python3 -m build -sn .
|
||||
pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
|
||||
completions yt-dlp.1 requirements.txt setup.cfg devscripts/* test/*
|
||||
completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/*
|
||||
|
||||
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
|
||||
|
||||
@ -21,7 +21,7 @@ clean-test:
|
||||
*.mp4 *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
|
||||
clean-dist:
|
||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
|
||||
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap
|
||||
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
|
||||
clean-cache:
|
||||
find . \( \
|
||||
-type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \
|
||||
@ -37,12 +37,15 @@ BINDIR ?= $(PREFIX)/bin
|
||||
MANDIR ?= $(PREFIX)/man
|
||||
SHAREDIR ?= $(PREFIX)/share
|
||||
PYTHON ?= /usr/bin/env python3
|
||||
GNUTAR ?= tar
|
||||
|
||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
|
||||
|
||||
# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2
|
||||
MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi)
|
||||
# set markdown input format to "markdown-smart" for pandoc version 2+ and to "markdown" for pandoc prior to version 2
|
||||
PANDOC_VERSION_CMD = pandoc -v 2>/dev/null | head -n1 | cut -d' ' -f2 | head -c1
|
||||
PANDOC_VERSION != $(PANDOC_VERSION_CMD)
|
||||
PANDOC_VERSION ?= $(shell $(PANDOC_VERSION_CMD))
|
||||
MARKDOWN_CMD = if [ "$(PANDOC_VERSION)" = "1" -o "$(PANDOC_VERSION)" = "0" ]; then echo markdown; else echo markdown-smart; fi
|
||||
MARKDOWN != $(MARKDOWN_CMD)
|
||||
MARKDOWN ?= $(shell $(MARKDOWN_CMD))
|
||||
|
||||
install: lazy-extractors yt-dlp yt-dlp.1 completions
|
||||
mkdir -p $(DESTDIR)$(BINDIR)
|
||||
@ -73,24 +76,28 @@ test:
|
||||
offlinetest: codetest
|
||||
$(PYTHON) -m pytest -k "not download"
|
||||
|
||||
# XXX: This is hard to maintain
|
||||
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies yt_dlp/networking
|
||||
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
|
||||
CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort
|
||||
CODE_FOLDERS != $(CODE_FOLDERS_CMD)
|
||||
CODE_FOLDERS ?= $(shell $(CODE_FOLDERS_CMD))
|
||||
CODE_FILES_CMD = for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done
|
||||
CODE_FILES != $(CODE_FILES_CMD)
|
||||
CODE_FILES ?= $(shell $(CODE_FILES_CMD))
|
||||
yt-dlp: $(CODE_FILES)
|
||||
mkdir -p zip
|
||||
for d in $(CODE_FOLDERS) ; do \
|
||||
mkdir -p zip/$$d ;\
|
||||
cp -pPR $$d/*.py zip/$$d/ ;\
|
||||
done
|
||||
touch -t 200001010101 zip/yt_dlp/*.py zip/yt_dlp/*/*.py
|
||||
(cd zip && touch -t 200001010101 $(CODE_FILES))
|
||||
mv zip/yt_dlp/__main__.py zip/
|
||||
cd zip ; zip -q ../yt-dlp yt_dlp/*.py yt_dlp/*/*.py __main__.py
|
||||
(cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py)
|
||||
rm -rf zip
|
||||
echo '#!$(PYTHON)' > yt-dlp
|
||||
cat yt-dlp.zip >> yt-dlp
|
||||
rm yt-dlp.zip
|
||||
chmod a+x yt-dlp
|
||||
|
||||
README.md: yt_dlp/*.py yt_dlp/*/*.py devscripts/make_readme.py
|
||||
README.md: $(CODE_FILES) devscripts/make_readme.py
|
||||
COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py
|
||||
|
||||
CONTRIBUTING.md: README.md devscripts/make_contributing.py
|
||||
@ -115,24 +122,26 @@ yt-dlp.1: README.md devscripts/prepare_manpage.py
|
||||
pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1
|
||||
rm -f yt-dlp.1.temp.md
|
||||
|
||||
completions/bash/yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/bash-completion.in
|
||||
completions/bash/yt-dlp: $(CODE_FILES) devscripts/bash-completion.in
|
||||
mkdir -p completions/bash
|
||||
$(PYTHON) devscripts/bash-completion.py
|
||||
|
||||
completions/zsh/_yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/zsh-completion.in
|
||||
completions/zsh/_yt-dlp: $(CODE_FILES) devscripts/zsh-completion.in
|
||||
mkdir -p completions/zsh
|
||||
$(PYTHON) devscripts/zsh-completion.py
|
||||
|
||||
completions/fish/yt-dlp.fish: yt_dlp/*.py yt_dlp/*/*.py devscripts/fish-completion.in
|
||||
completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in
|
||||
mkdir -p completions/fish
|
||||
$(PYTHON) devscripts/fish-completion.py
|
||||
|
||||
_EXTRACTOR_FILES = $(shell find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py')
|
||||
_EXTRACTOR_FILES_CMD = find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py'
|
||||
_EXTRACTOR_FILES != $(_EXTRACTOR_FILES_CMD)
|
||||
_EXTRACTOR_FILES ?= $(shell $(_EXTRACTOR_FILES_CMD))
|
||||
yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||
|
||||
yt-dlp.tar.gz: all
|
||||
@tar -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
|
||||
@$(GNUTAR) -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
|
||||
--exclude '*.DS_Store' \
|
||||
--exclude '*.kate-swp' \
|
||||
--exclude '*.pyc' \
|
||||
@ -144,12 +153,8 @@ yt-dlp.tar.gz: all
|
||||
-- \
|
||||
README.md supportedsites.md Changelog.md LICENSE \
|
||||
CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \
|
||||
Makefile MANIFEST.in yt-dlp.1 README.txt completions \
|
||||
setup.py setup.cfg yt-dlp yt_dlp requirements.txt \
|
||||
devscripts test
|
||||
Makefile yt-dlp.1 README.txt completions .gitignore \
|
||||
setup.cfg yt-dlp yt_dlp pyproject.toml devscripts test
|
||||
|
||||
AUTHORS: .mailmap
|
||||
git shortlog -s -n | cut -f2 | sort > AUTHORS
|
||||
|
||||
.mailmap:
|
||||
git shortlog -s -e -n | awk '!(out[$$NF]++) { $$1="";sub(/^[ \t]+/,""); print}' > .mailmap
|
||||
AUTHORS:
|
||||
git shortlog -s -n HEAD | cut -f2 | sort > AUTHORS
|
||||
|
52
README.md
52
README.md
@ -167,7 +167,8 @@ For ease of use, a few more compat options are available:
|
||||
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
|
||||
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
|
||||
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
|
||||
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
|
||||
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
|
||||
* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
|
||||
|
||||
|
||||
# INSTALLATION
|
||||
@ -280,7 +281,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
|
||||
|
||||
* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html)
|
||||
|
||||
There are bugs in ffmpeg that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
|
||||
There are bugs in ffmpeg that cause various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
|
||||
|
||||
**Important**: What you need is ffmpeg *binary*, **NOT** [the python package of the same name](https://pypi.org/project/ffmpeg)
|
||||
|
||||
@ -320,19 +321,21 @@ If you do not have the necessary dependencies for a task you are attempting, yt-
|
||||
## COMPILE
|
||||
|
||||
### Standalone PyInstaller Builds
|
||||
To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). Once you have all the necessary dependencies installed, simply run `pyinst.py`. The executable will be built for the same architecture (x86/ARM, 32/64 bit) as the Python used.
|
||||
To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). The executable will be built for the same architecture (x86/ARM, 32/64 bit) as the Python used. You can run the following commands:
|
||||
|
||||
python3 -m pip install -U pyinstaller -r requirements.txt
|
||||
python3 devscripts/make_lazy_extractors.py
|
||||
python3 pyinst.py
|
||||
```
|
||||
python3 devscripts/install_deps.py --include pyinstaller
|
||||
python3 devscripts/make_lazy_extractors.py
|
||||
python3 -m bundle.pyinstaller
|
||||
```
|
||||
|
||||
On some systems, you may need to use `py` or `python` instead of `python3`.
|
||||
|
||||
`pyinst.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate).
|
||||
`bundle/pyinstaller.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate).
|
||||
|
||||
**Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment.
|
||||
|
||||
**Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly.
|
||||
**Important**: Running `pyinstaller` directly **without** using `bundle/pyinstaller.py` is **not** officially supported. This may or may not work correctly.
|
||||
|
||||
### Platform-independent Binary (UNIX)
|
||||
You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*.
|
||||
@ -345,14 +348,17 @@ You can also run `make yt-dlp` instead to compile only the binary without updati
|
||||
|
||||
While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run.
|
||||
|
||||
If you wish to build it anyway, install Python and py2exe, and then simply run `setup.py py2exe`
|
||||
If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands:
|
||||
|
||||
py -m pip install -U py2exe -r requirements.txt
|
||||
py devscripts/make_lazy_extractors.py
|
||||
py setup.py py2exe
|
||||
```
|
||||
py devscripts/install_deps.py --include py2exe
|
||||
py devscripts/make_lazy_extractors.py
|
||||
py -m bundle.py2exe
|
||||
```
|
||||
|
||||
### Related scripts
|
||||
|
||||
* **`devscripts/install_deps.py`** - Install dependencies for yt-dlp.
|
||||
* **`devscripts/update-version.py`** - Update the version number based on current date.
|
||||
* **`devscripts/set-variant.py`** - Set the build variant of the executable.
|
||||
* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
|
||||
@ -1305,7 +1311,8 @@ The available fields are:
|
||||
- `display_id` (string): An alternative identifier for the video
|
||||
- `uploader` (string): Full name of the video uploader
|
||||
- `license` (string): License name the video is licensed under
|
||||
- `creator` (string): The creator of the video
|
||||
- `creators` (list): The creators of the video
|
||||
- `creator` (string): The creators of the video; comma-separated
|
||||
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
|
||||
- `upload_date` (string): Video upload date in UTC (YYYYMMDD)
|
||||
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
|
||||
@ -1380,11 +1387,15 @@ Available for the media that is a track or a part of a music album:
|
||||
- `track_number` (numeric): Number of the track within an album or a disc
|
||||
- `track_id` (string): Id of the track
|
||||
- `artists` (list): Artist(s) of the track
|
||||
- `composers` (list): Composer(s) of the piece
|
||||
- `artist` (string): Artist(s) of the track; comma-separated
|
||||
- `genres` (list): Genre(s) of the track
|
||||
- `genre` (string): Genre(s) of the track; comma-separated
|
||||
- `composers` (list): Composer(s) of the piece
|
||||
- `composer` (string): Composer(s) of the piece; comma-separated
|
||||
- `album` (string): Title of the album the track belongs to
|
||||
- `album_type` (string): Type of the album
|
||||
- `album_artists` (list): List of all artists appeared on the album
|
||||
- `album_artists` (list): All artists appeared on the album
|
||||
- `album_artist` (string): All artists appeared on the album; comma-separated
|
||||
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||
|
||||
Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
|
||||
@ -1762,11 +1773,11 @@ Metadata fields | From
|
||||
`description`, `synopsis` | `description`
|
||||
`purl`, `comment` | `webpage_url`
|
||||
`track` | `track_number`
|
||||
`artist` | `artists`, `creator`, `uploader` or `uploader_id`
|
||||
`composer` | `composers`
|
||||
`genre` | `genres`
|
||||
`artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
|
||||
`composer` | `composer` or `composers`
|
||||
`genre` | `genre` or `genres`
|
||||
`album` | `album`
|
||||
`album_artist` | `album_artists`
|
||||
`album_artist` | `album_artist` or `album_artists`
|
||||
`disc` | `disc_number`
|
||||
`show` | `series`
|
||||
`season_number` | `season_number`
|
||||
@ -1890,6 +1901,9 @@ The following extractors use this feature:
|
||||
#### nflplusreplay
|
||||
* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
|
||||
|
||||
#### jiosaavn
|
||||
* `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`
|
||||
|
||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||
|
1
bundle/__init__.py
Normal file
1
bundle/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Empty file
|
59
bundle/py2exe.py
Executable file
59
bundle/py2exe.py
Executable file
@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Allow execution from anywhere
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import warnings
|
||||
|
||||
from py2exe import freeze
|
||||
|
||||
from devscripts.utils import read_version
|
||||
|
||||
VERSION = read_version()
|
||||
|
||||
|
||||
def main():
|
||||
warnings.warn(
|
||||
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
|
||||
'It is recommended to run "pyinst.py" to build using pyinstaller instead')
|
||||
|
||||
return freeze(
|
||||
console=[{
|
||||
'script': './yt_dlp/__main__.py',
|
||||
'dest_base': 'yt-dlp',
|
||||
'icon_resources': [(1, 'devscripts/logo.ico')],
|
||||
}],
|
||||
version_info={
|
||||
'version': VERSION,
|
||||
'description': 'A youtube-dl fork with additional features and patches',
|
||||
'comments': 'Official repository: <https://github.com/yt-dlp/yt-dlp>',
|
||||
'product_name': 'yt-dlp',
|
||||
'product_version': VERSION,
|
||||
},
|
||||
options={
|
||||
'bundle_files': 0,
|
||||
'compressed': 1,
|
||||
'optimize': 2,
|
||||
'dist_dir': './dist',
|
||||
'excludes': [
|
||||
# py2exe cannot import Crypto
|
||||
'Crypto',
|
||||
'Cryptodome',
|
||||
# py2exe appears to confuse this with our socks library.
|
||||
# We don't use pysocks and urllib3.contrib.socks would fail to import if tried.
|
||||
'urllib3.contrib.socks'
|
||||
],
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
# Modules that are only imported dynamically must be added here
|
||||
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
|
||||
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
|
||||
},
|
||||
zipfile=None,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
2
pyinst.py → bundle/pyinstaller.py
Normal file → Executable file
2
pyinst.py → bundle/pyinstaller.py
Normal file → Executable file
@ -4,7 +4,7 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import platform
|
||||
|
66
devscripts/install_deps.py
Executable file
66
devscripts/install_deps.py
Executable file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Allow execution from anywhere
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from devscripts.tomlparse import parse_toml
|
||||
from devscripts.utils import read_file
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp')
|
||||
parser.add_argument(
|
||||
'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
|
||||
parser.add_argument(
|
||||
'-e', '--exclude', metavar='REQUIREMENT', action='append', help='Exclude a required dependency')
|
||||
parser.add_argument(
|
||||
'-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
|
||||
parser.add_argument(
|
||||
'-o', '--only-optional', action='store_true', help='Only install optional dependencies')
|
||||
parser.add_argument(
|
||||
'-p', '--print', action='store_true', help='Only print a requirements.txt to stdout')
|
||||
parser.add_argument(
|
||||
'-u', '--user', action='store_true', help='Install with pip as --user')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
toml_data = parse_toml(read_file(args.input))
|
||||
deps = toml_data['project']['dependencies']
|
||||
targets = deps.copy() if not args.only_optional else []
|
||||
|
||||
for exclude in args.exclude or []:
|
||||
for dep in deps:
|
||||
simplified_dep = re.match(r'[\w-]+', dep)[0]
|
||||
if dep in targets and (exclude.lower() == simplified_dep.lower() or exclude == dep):
|
||||
targets.remove(dep)
|
||||
|
||||
optional_deps = toml_data['project']['optional-dependencies']
|
||||
for include in args.include or []:
|
||||
group = optional_deps.get(include)
|
||||
if group:
|
||||
targets.extend(group)
|
||||
|
||||
if args.print:
|
||||
for target in targets:
|
||||
print(target)
|
||||
return
|
||||
|
||||
pip_args = [sys.executable, '-m', 'pip', 'install', '-U']
|
||||
if args.user:
|
||||
pip_args.append('--user')
|
||||
pip_args.extend(targets)
|
||||
|
||||
return subprocess.call(pip_args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
189
devscripts/tomlparse.py
Executable file
189
devscripts/tomlparse.py
Executable file
@ -0,0 +1,189 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Simple parser for spec compliant toml files
|
||||
|
||||
A simple toml parser for files that comply with the spec.
|
||||
Should only be used to parse `pyproject.toml` for `install_deps.py`.
|
||||
|
||||
IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
WS = r'(?:[\ \t]*)'
|
||||
STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'')
|
||||
SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+')
|
||||
KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*')
|
||||
EQUALS_RE = re.compile(rf'={WS}')
|
||||
WS_RE = re.compile(WS)
|
||||
|
||||
_SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)'
|
||||
EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE)
|
||||
|
||||
LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*')
|
||||
LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+')
|
||||
|
||||
|
||||
def parse_key(value: str):
|
||||
for match in SINGLE_KEY_RE.finditer(value):
|
||||
if match[0][0] == '"':
|
||||
yield json.loads(match[0])
|
||||
elif match[0][0] == '\'':
|
||||
yield match[0][1:-1]
|
||||
else:
|
||||
yield match[0]
|
||||
|
||||
|
||||
def get_target(root: dict, paths: list[str], is_list=False):
|
||||
target = root
|
||||
|
||||
for index, key in enumerate(paths, 1):
|
||||
use_list = is_list and index == len(paths)
|
||||
result = target.get(key)
|
||||
if result is None:
|
||||
result = [] if use_list else {}
|
||||
target[key] = result
|
||||
|
||||
if isinstance(result, dict):
|
||||
target = result
|
||||
elif use_list:
|
||||
target = {}
|
||||
result.append(target)
|
||||
else:
|
||||
target = result[-1]
|
||||
|
||||
assert isinstance(target, dict)
|
||||
return target
|
||||
|
||||
|
||||
def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern):
|
||||
index += 1
|
||||
|
||||
if match := ws_re.match(data, index):
|
||||
index = match.end()
|
||||
|
||||
while data[index] != end:
|
||||
index = yield True, index
|
||||
|
||||
if match := ws_re.match(data, index):
|
||||
index = match.end()
|
||||
|
||||
if data[index] == ',':
|
||||
index += 1
|
||||
|
||||
if match := ws_re.match(data, index):
|
||||
index = match.end()
|
||||
|
||||
assert data[index] == end
|
||||
yield False, index + 1
|
||||
|
||||
|
||||
def parse_value(data: str, index: int):
|
||||
if data[index] == '[':
|
||||
result = []
|
||||
|
||||
indices = parse_enclosed(data, index, ']', LIST_WS_RE)
|
||||
valid, index = next(indices)
|
||||
while valid:
|
||||
index, value = parse_value(data, index)
|
||||
result.append(value)
|
||||
valid, index = indices.send(index)
|
||||
|
||||
return index, result
|
||||
|
||||
if data[index] == '{':
|
||||
result = {}
|
||||
|
||||
indices = parse_enclosed(data, index, '}', WS_RE)
|
||||
valid, index = next(indices)
|
||||
while valid:
|
||||
valid, index = indices.send(parse_kv_pair(data, index, result))
|
||||
|
||||
return index, result
|
||||
|
||||
if match := STRING_RE.match(data, index):
|
||||
return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1]
|
||||
|
||||
match = LEFTOVER_VALUE_RE.match(data, index)
|
||||
assert match
|
||||
value = match[0].strip()
|
||||
for func in [
|
||||
int,
|
||||
float,
|
||||
datetime.time.fromisoformat,
|
||||
datetime.date.fromisoformat,
|
||||
datetime.datetime.fromisoformat,
|
||||
{'true': True, 'false': False}.get,
|
||||
]:
|
||||
try:
|
||||
value = func(value)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return match.end(), value
|
||||
|
||||
|
||||
def parse_kv_pair(data: str, index: int, target: dict):
|
||||
match = KEY_RE.match(data, index)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
*keys, key = parse_key(match[0])
|
||||
|
||||
match = EQUALS_RE.match(data, match.end())
|
||||
assert match
|
||||
index = match.end()
|
||||
|
||||
index, value = parse_value(data, index)
|
||||
get_target(target, keys)[key] = value
|
||||
return index
|
||||
|
||||
|
||||
def parse_toml(data: str):
|
||||
root = {}
|
||||
target = root
|
||||
|
||||
index = 0
|
||||
while True:
|
||||
match = EXPRESSION_RE.search(data, index)
|
||||
if not match:
|
||||
break
|
||||
|
||||
if match.group('subtable'):
|
||||
index = match.end()
|
||||
path, is_list = match.group('path', 'is_list')
|
||||
target = get_target(root, list(parse_key(path)), bool(is_list))
|
||||
continue
|
||||
|
||||
index = parse_kv_pair(data, match.start(), target)
|
||||
assert index is not None
|
||||
|
||||
return root
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('infile', type=Path, help='The TOML file to read as input')
|
||||
args = parser.parse_args()
|
||||
|
||||
with args.infile.open('r', encoding='utf-8') as file:
|
||||
data = file.read()
|
||||
|
||||
def default(obj):
|
||||
if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)):
|
||||
return obj.isoformat()
|
||||
|
||||
print(json.dumps(parse_toml(data), default=default))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
121
pyproject.toml
121
pyproject.toml
@ -1,5 +1,118 @@
|
||||
[build-system]
|
||||
build-backend = 'setuptools.build_meta'
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/5941
|
||||
# https://github.com/pypa/distutils/issues/17
|
||||
requires = ['setuptools > 50']
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "yt-dlp"
|
||||
maintainers = [
|
||||
{name = "pukkandan", email = "pukkandan.ytdlp@gmail.com"},
|
||||
{name = "Grub4K", email = "contact@grub4k.xyz"},
|
||||
{name = "bashonly", email = "bashonly@protonmail.com"},
|
||||
]
|
||||
description = "A youtube-dl fork with additional features and patches"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
keywords = [
|
||||
"youtube-dl",
|
||||
"video-downloader",
|
||||
"youtube-downloader",
|
||||
"sponsorblock",
|
||||
"youtube-dlc",
|
||||
"yt-dlp",
|
||||
]
|
||||
license = {file = "LICENSE"}
|
||||
classifiers = [
|
||||
"Topic :: Multimedia :: Video",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Environment :: Console",
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: Implementation",
|
||||
"Programming Language :: Python :: Implementation :: CPython",
|
||||
"Programming Language :: Python :: Implementation :: PyPy",
|
||||
"License :: OSI Approved :: The Unlicense (Unlicense)",
|
||||
"Operating System :: OS Independent",
|
||||
]
|
||||
dynamic = ["version"]
|
||||
dependencies = [
|
||||
"brotli; implementation_name=='cpython'",
|
||||
"brotlicffi; implementation_name!='cpython'",
|
||||
"certifi",
|
||||
"mutagen",
|
||||
"pycryptodomex",
|
||||
"requests>=2.31.0,<3",
|
||||
"urllib3>=1.26.17,<3",
|
||||
"websockets>=12.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
secretstorage = [
|
||||
"cffi",
|
||||
"secretstorage",
|
||||
]
|
||||
build = [
|
||||
"build",
|
||||
"hatchling",
|
||||
"pip",
|
||||
"wheel",
|
||||
]
|
||||
dev = [
|
||||
"flake8",
|
||||
"isort",
|
||||
"pytest",
|
||||
]
|
||||
pyinstaller = ["pyinstaller>=6.3"]
|
||||
py2exe = ["py2exe>=0.12"]
|
||||
|
||||
[project.urls]
|
||||
Documentation = "https://github.com/yt-dlp/yt-dlp#readme"
|
||||
Repository = "https://github.com/yt-dlp/yt-dlp"
|
||||
Tracker = "https://github.com/yt-dlp/yt-dlp/issues"
|
||||
Funding = "https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators"
|
||||
|
||||
[project.scripts]
|
||||
yt-dlp = "yt_dlp:main"
|
||||
|
||||
[project.entry-points.pyinstaller40]
|
||||
hook-dirs = "yt_dlp.__pyinstaller:get_hook_dirs"
|
||||
|
||||
[tool.hatch.build.targets.sdist]
|
||||
include = [
|
||||
"/yt_dlp",
|
||||
"/devscripts",
|
||||
"/test",
|
||||
"/.gitignore", # included by default, needed for auto-excludes
|
||||
"/Changelog.md",
|
||||
"/LICENSE", # included as license
|
||||
"/pyproject.toml", # included by default
|
||||
"/README.md", # included as readme
|
||||
"/setup.cfg",
|
||||
"/supportedsites.md",
|
||||
]
|
||||
artifacts = [
|
||||
"/yt_dlp/extractor/lazy_extractors.py",
|
||||
"/completions",
|
||||
"/AUTHORS", # included by default
|
||||
"/README.txt",
|
||||
"/yt-dlp.1",
|
||||
]
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["yt_dlp"]
|
||||
artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]
|
||||
|
||||
[tool.hatch.build.targets.wheel.shared-data]
|
||||
"completions/bash/yt-dlp" = "share/bash-completion/completions/yt-dlp"
|
||||
"completions/zsh/_yt-dlp" = "share/zsh/site-functions/_yt-dlp"
|
||||
"completions/fish/yt-dlp.fish" = "share/fish/vendor_completions.d/yt-dlp.fish"
|
||||
"README.txt" = "share/doc/yt_dlp/README.txt"
|
||||
"yt-dlp.1" = "share/man/man1/yt-dlp.1"
|
||||
|
||||
[tool.hatch.version]
|
||||
path = "yt_dlp/version.py"
|
||||
pattern = "_pkg_version = '(?P<version>[^']+)'"
|
||||
|
@ -1,8 +0,0 @@
|
||||
mutagen
|
||||
pycryptodomex
|
||||
brotli; implementation_name=='cpython'
|
||||
brotlicffi; implementation_name!='cpython'
|
||||
certifi
|
||||
requests>=2.31.0,<3
|
||||
urllib3>=1.26.17,<3
|
||||
websockets>=12.0
|
@ -1,7 +1,3 @@
|
||||
[wheel]
|
||||
universal = true
|
||||
|
||||
|
||||
[flake8]
|
||||
exclude = build,venv,.tox,.git,.pytest_cache
|
||||
ignore = E402,E501,E731,E741,W503
|
||||
|
183
setup.py
183
setup.py
@ -1,183 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Allow execution from anywhere
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
import subprocess
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from setuptools import Command, find_packages, setup
|
||||
setuptools_available = True
|
||||
except ImportError:
|
||||
from distutils.core import Command, setup
|
||||
setuptools_available = False
|
||||
|
||||
from devscripts.utils import read_file, read_version
|
||||
|
||||
VERSION = read_version(varname='_pkg_version')
|
||||
|
||||
DESCRIPTION = 'A youtube-dl fork with additional features and patches'
|
||||
|
||||
LONG_DESCRIPTION = '\n\n'.join((
|
||||
'Official repository: <https://github.com/yt-dlp/yt-dlp>',
|
||||
'**PS**: Some links in this document will not work since this is a copy of the README.md from Github',
|
||||
read_file('README.md')))
|
||||
|
||||
REQUIREMENTS = read_file('requirements.txt').splitlines()
|
||||
|
||||
|
||||
def packages():
|
||||
if setuptools_available:
|
||||
return find_packages(exclude=('youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'))
|
||||
|
||||
return [
|
||||
'yt_dlp', 'yt_dlp.extractor', 'yt_dlp.downloader', 'yt_dlp.postprocessor', 'yt_dlp.compat',
|
||||
]
|
||||
|
||||
|
||||
def py2exe_params():
|
||||
warnings.warn(
|
||||
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
|
||||
'It is recommended to run "pyinst.py" to build using pyinstaller instead')
|
||||
|
||||
return {
|
||||
'console': [{
|
||||
'script': './yt_dlp/__main__.py',
|
||||
'dest_base': 'yt-dlp',
|
||||
'icon_resources': [(1, 'devscripts/logo.ico')],
|
||||
}],
|
||||
'version_info': {
|
||||
'version': VERSION,
|
||||
'description': DESCRIPTION,
|
||||
'comments': LONG_DESCRIPTION.split('\n')[0],
|
||||
'product_name': 'yt-dlp',
|
||||
'product_version': VERSION,
|
||||
},
|
||||
'options': {
|
||||
'bundle_files': 0,
|
||||
'compressed': 1,
|
||||
'optimize': 2,
|
||||
'dist_dir': './dist',
|
||||
'excludes': [
|
||||
# py2exe cannot import Crypto
|
||||
'Crypto',
|
||||
'Cryptodome',
|
||||
# py2exe appears to confuse this with our socks library.
|
||||
# We don't use pysocks and urllib3.contrib.socks would fail to import if tried.
|
||||
'urllib3.contrib.socks'
|
||||
],
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
# Modules that are only imported dynamically must be added here
|
||||
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
|
||||
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
|
||||
},
|
||||
'zipfile': None,
|
||||
}
|
||||
|
||||
|
||||
def build_params():
|
||||
files_spec = [
|
||||
('share/bash-completion/completions', ['completions/bash/yt-dlp']),
|
||||
('share/zsh/site-functions', ['completions/zsh/_yt-dlp']),
|
||||
('share/fish/vendor_completions.d', ['completions/fish/yt-dlp.fish']),
|
||||
('share/doc/yt_dlp', ['README.txt']),
|
||||
('share/man/man1', ['yt-dlp.1'])
|
||||
]
|
||||
data_files = []
|
||||
for dirname, files in files_spec:
|
||||
resfiles = []
|
||||
for fn in files:
|
||||
if not os.path.exists(fn):
|
||||
warnings.warn(f'Skipping file {fn} since it is not present. Try running " make pypi-files " first')
|
||||
else:
|
||||
resfiles.append(fn)
|
||||
data_files.append((dirname, resfiles))
|
||||
|
||||
params = {'data_files': data_files}
|
||||
|
||||
if setuptools_available:
|
||||
params['entry_points'] = {
|
||||
'console_scripts': ['yt-dlp = yt_dlp:main'],
|
||||
'pyinstaller40': ['hook-dirs = yt_dlp.__pyinstaller:get_hook_dirs'],
|
||||
}
|
||||
else:
|
||||
params['scripts'] = ['yt-dlp']
|
||||
return params
|
||||
|
||||
|
||||
class build_lazy_extractors(Command):
|
||||
description = 'Build the extractor lazy loading module'
|
||||
user_options = []
|
||||
|
||||
def initialize_options(self):
|
||||
pass
|
||||
|
||||
def finalize_options(self):
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
if self.dry_run:
|
||||
print('Skipping build of lazy extractors in dry run mode')
|
||||
return
|
||||
subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py'])
|
||||
|
||||
|
||||
def main():
|
||||
if sys.argv[1:2] == ['py2exe']:
|
||||
params = py2exe_params()
|
||||
try:
|
||||
from py2exe import freeze
|
||||
except ImportError:
|
||||
import py2exe # noqa: F401
|
||||
warnings.warn('You are using an outdated version of py2exe. Support for this version will be removed in the future')
|
||||
params['console'][0].update(params.pop('version_info'))
|
||||
params['options'] = {'py2exe': params.pop('options')}
|
||||
else:
|
||||
return freeze(**params)
|
||||
else:
|
||||
params = build_params()
|
||||
|
||||
setup(
|
||||
name='yt-dlp', # package name (do not change/remove comment)
|
||||
version=VERSION,
|
||||
maintainer='pukkandan',
|
||||
maintainer_email='pukkandan.ytdlp@gmail.com',
|
||||
description=DESCRIPTION,
|
||||
long_description=LONG_DESCRIPTION,
|
||||
long_description_content_type='text/markdown',
|
||||
url='https://github.com/yt-dlp/yt-dlp',
|
||||
packages=packages(),
|
||||
install_requires=REQUIREMENTS,
|
||||
python_requires='>=3.8',
|
||||
project_urls={
|
||||
'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme',
|
||||
'Source': 'https://github.com/yt-dlp/yt-dlp',
|
||||
'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues',
|
||||
'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators',
|
||||
},
|
||||
classifiers=[
|
||||
'Topic :: Multimedia :: Video',
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Environment :: Console',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
'Programming Language :: Python :: 3.10',
|
||||
'Programming Language :: Python :: 3.11',
|
||||
'Programming Language :: Python :: 3.12',
|
||||
'Programming Language :: Python :: Implementation',
|
||||
'Programming Language :: Python :: Implementation :: CPython',
|
||||
'Programming Language :: Python :: Implementation :: PyPy',
|
||||
'License :: Public Domain',
|
||||
'Operating System :: OS Independent',
|
||||
],
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
**params
|
||||
)
|
||||
|
||||
|
||||
main()
|
@ -10,7 +10,7 @@ import types
|
||||
import yt_dlp.extractor
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.compat import compat_os_name
|
||||
from yt_dlp.utils import preferredencoding, try_call, write_string
|
||||
from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port
|
||||
|
||||
if 'pytest' in sys.modules:
|
||||
import pytest
|
||||
@ -223,6 +223,10 @@ def sanitize_got_info_dict(got_dict):
|
||||
if test_info_dict.get('display_id') == test_info_dict.get('id'):
|
||||
test_info_dict.pop('display_id')
|
||||
|
||||
# Remove deprecated fields
|
||||
for old in YoutubeDL._deprecated_multivalue_fields.keys():
|
||||
test_info_dict.pop(old, None)
|
||||
|
||||
# release_year may be generated from release_date
|
||||
if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
|
||||
test_info_dict.pop('release_year')
|
||||
@ -329,3 +333,8 @@ def http_server_port(httpd):
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
def verify_address_availability(address):
|
||||
if find_available_port(address) is None:
|
||||
pytest.skip(f'Unable to bind to source address {address} (address may not exist)')
|
||||
|
@ -941,7 +941,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
def get_videos(filter_=None):
|
||||
ydl = YDL({'match_filter': filter_, 'simulate': True})
|
||||
for v in videos:
|
||||
ydl.process_ie_result(v, download=True)
|
||||
ydl.process_ie_result(v.copy(), download=True)
|
||||
return [v['id'] for v in ydl.downloaded_info_dicts]
|
||||
|
||||
res = get_videos()
|
||||
|
@ -13,6 +13,7 @@ import http.client
|
||||
import http.cookiejar
|
||||
import http.server
|
||||
import io
|
||||
import logging
|
||||
import pathlib
|
||||
import random
|
||||
import ssl
|
||||
@ -26,7 +27,7 @@ import zlib
|
||||
from email.message import Message
|
||||
from http.cookiejar import CookieJar
|
||||
|
||||
from test.helper import FakeYDL, http_server_port
|
||||
from test.helper import FakeYDL, http_server_port, verify_address_availability
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import brotli, requests, urllib3
|
||||
from yt_dlp.networking import (
|
||||
@ -180,6 +181,12 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
self.send_header('Location', '/a/b/./../../headers')
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
elif self.path == '/redirect_dotsegments_absolute':
|
||||
self.send_response(301)
|
||||
# redirect to /headers but with dot segments before - absolute url
|
||||
self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
elif self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
@ -345,16 +352,17 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||
res.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_remove_dot_segments(self, handler):
|
||||
with handler() as rh:
|
||||
@pytest.mark.parametrize('path', [
|
||||
'/a/b/./../../headers',
|
||||
'/redirect_dotsegments',
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/9020
|
||||
'/redirect_dotsegments_absolute',
|
||||
])
|
||||
def test_remove_dot_segments(self, handler, path):
|
||||
with handler(verbose=True) as rh:
|
||||
# This isn't a comprehensive test,
|
||||
# but it should be enough to check whether the handler is removing dot segments
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
|
||||
assert res.status == 200
|
||||
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
||||
res.close()
|
||||
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
|
||||
# but it should be enough to check whether the handler is removing dot segments in required scenarios
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
|
||||
assert res.status == 200
|
||||
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
||||
res.close()
|
||||
@ -538,6 +546,9 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_source_address(self, handler):
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
# on some systems these loopback addresses we need for testing may not be available
|
||||
# see: https://github.com/yt-dlp/yt-dlp/issues/8890
|
||||
verify_address_availability(source_address)
|
||||
with handler(source_address=source_address) as rh:
|
||||
data = validate_and_send(
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
|
||||
@ -742,6 +753,25 @@ class TestClientCertificate:
|
||||
})
|
||||
|
||||
|
||||
class TestRequestHandlerMisc:
|
||||
"""Misc generic tests for request handlers, not related to request or validation testing"""
|
||||
@pytest.mark.parametrize('handler,logger_name', [
|
||||
('Requests', 'urllib3'),
|
||||
('Websockets', 'websockets.client'),
|
||||
('Websockets', 'websockets.server')
|
||||
], indirect=['handler'])
|
||||
def test_remove_logging_handler(self, handler, logger_name):
|
||||
# Ensure any logging handlers, which may contain a YoutubeDL instance,
|
||||
# are removed when we close the request handler
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/8922
|
||||
logging_handlers = logging.getLogger(logger_name).handlers
|
||||
before_count = len(logging_handlers)
|
||||
rh = handler()
|
||||
assert len(logging_handlers) == before_count + 1
|
||||
rh.close()
|
||||
assert len(logging_handlers) == before_count
|
||||
|
||||
|
||||
class TestUrllibRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
def test_file_urls(self, handler):
|
||||
@ -817,6 +847,7 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
|
||||
assert not isinstance(exc_info.value, TransportError)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
|
||||
class TestRequestsRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('raised,expected', [
|
||||
(lambda: requests.exceptions.ConnectTimeout(), TransportError),
|
||||
@ -833,7 +864,6 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
|
||||
(lambda: requests.exceptions.RequestException(), RequestError)
|
||||
# (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
|
||||
])
|
||||
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
|
||||
def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
|
||||
with handler() as rh:
|
||||
def mock_get_instance(*args, **kwargs):
|
||||
@ -867,7 +897,6 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
|
||||
'3 bytes read, 5 more expected'
|
||||
),
|
||||
])
|
||||
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
|
||||
def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
|
||||
from requests.models import Response as RequestsResponse
|
||||
from urllib3.response import HTTPResponse as Urllib3Response
|
||||
@ -886,6 +915,21 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
|
||||
|
||||
assert exc_info.type is expected
|
||||
|
||||
def test_close(self, handler, monkeypatch):
|
||||
rh = handler()
|
||||
session = rh._get_instance(cookiejar=rh.cookiejar)
|
||||
called = False
|
||||
original_close = session.close
|
||||
|
||||
def mock_close(*args, **kwargs):
|
||||
nonlocal called
|
||||
called = True
|
||||
return original_close(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(session, 'close', mock_close)
|
||||
rh.close()
|
||||
assert called
|
||||
|
||||
|
||||
def run_validation(handler, error, req, **handler_kwargs):
|
||||
with handler(**handler_kwargs) as rh:
|
||||
@ -1195,6 +1239,19 @@ class TestRequestDirector:
|
||||
assert director.send(Request('http://')).read() == b''
|
||||
assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
|
||||
|
||||
def test_close(self, monkeypatch):
|
||||
director = RequestDirector(logger=FakeLogger())
|
||||
director.add_handler(FakeRH(logger=FakeLogger()))
|
||||
called = False
|
||||
|
||||
def mock_close(*args, **kwargs):
|
||||
nonlocal called
|
||||
called = True
|
||||
|
||||
monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close)
|
||||
director.close()
|
||||
assert called
|
||||
|
||||
|
||||
# XXX: do we want to move this to test_YoutubeDL.py?
|
||||
class TestYoutubeDLNetworking:
|
||||
|
@ -8,13 +8,9 @@ import pytest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import platform
|
||||
import random
|
||||
import ssl
|
||||
import urllib.error
|
||||
import warnings
|
||||
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import certifi
|
||||
@ -30,7 +26,6 @@ from yt_dlp.networking._helper import (
|
||||
from yt_dlp.networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
_CompatHTTPError,
|
||||
)
|
||||
from yt_dlp.socks import ProxyType
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
@ -179,11 +174,10 @@ class TestNetworkingExceptions:
|
||||
def create_response(status):
|
||||
return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
|
||||
|
||||
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
|
||||
def test_http_error(self, http_error_class):
|
||||
def test_http_error(self):
|
||||
|
||||
response = self.create_response(403)
|
||||
error = http_error_class(response)
|
||||
error = HTTPError(response)
|
||||
|
||||
assert error.status == 403
|
||||
assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
|
||||
@ -194,80 +188,12 @@ class TestNetworkingExceptions:
|
||||
assert data == b'test'
|
||||
assert repr(error) == '<HTTPError 403: Forbidden>'
|
||||
|
||||
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
|
||||
def test_redirect_http_error(self, http_error_class):
|
||||
def test_redirect_http_error(self):
|
||||
response = self.create_response(301)
|
||||
error = http_error_class(response, redirect_loop=True)
|
||||
error = HTTPError(response, redirect_loop=True)
|
||||
assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
|
||||
assert error.reason == 'Moved Permanently'
|
||||
|
||||
def test_compat_http_error(self):
|
||||
response = self.create_response(403)
|
||||
error = _CompatHTTPError(HTTPError(response))
|
||||
assert isinstance(error, HTTPError)
|
||||
assert isinstance(error, urllib.error.HTTPError)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def raises_deprecation_warning():
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter('always')
|
||||
yield
|
||||
|
||||
if len(w) == 0:
|
||||
pytest.fail('Did not raise DeprecationWarning')
|
||||
if len(w) > 1:
|
||||
pytest.fail(f'Raised multiple warnings: {w}')
|
||||
|
||||
if not issubclass(w[-1].category, DeprecationWarning):
|
||||
pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}')
|
||||
w.clear()
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.code == 403
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.getcode() == 403
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.hdrs is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.info() is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.headers is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.filename == error.response.url
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.url == error.response.url
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.geturl() == error.response.url
|
||||
|
||||
# Passthrough file operations
|
||||
with raises_deprecation_warning():
|
||||
assert error.read() == b'test'
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert not error.closed
|
||||
|
||||
with raises_deprecation_warning():
|
||||
# Technically Response operations are also passed through, which should not be used.
|
||||
assert error.get_header('test') == 'test'
|
||||
|
||||
# Should not raise a warning
|
||||
error.close()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
|
||||
def test_compat_http_error_autoclose(self):
|
||||
# Compat HTTPError should not autoclose response
|
||||
response = self.create_response(403)
|
||||
_CompatHTTPError(HTTPError(response))
|
||||
assert not response.closed
|
||||
|
||||
def test_incomplete_read_error(self):
|
||||
error = IncompleteRead(4, 3, cause='test')
|
||||
assert isinstance(error, IncompleteRead)
|
||||
|
@ -25,7 +25,7 @@ from socketserver import (
|
||||
ThreadingTCPServer,
|
||||
)
|
||||
|
||||
from test.helper import http_server_port
|
||||
from test.helper import http_server_port, verify_address_availability
|
||||
from yt_dlp.networking import Request
|
||||
from yt_dlp.networking.exceptions import ProxyError, TransportError
|
||||
from yt_dlp.socks import (
|
||||
@ -326,6 +326,7 @@ class TestSocks4Proxy:
|
||||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
verify_address_availability(source_address)
|
||||
with handler(proxies={'all': f'socks4://{server_address}'},
|
||||
source_address=source_address) as rh:
|
||||
response = ctx.socks_info_request(rh)
|
||||
@ -441,6 +442,7 @@ class TestSocks5Proxy:
|
||||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
verify_address_availability(source_address)
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}, source_address=source_address) as rh:
|
||||
response = ctx.socks_info_request(rh)
|
||||
assert response['client_address'][0] == source_address
|
||||
|
@ -2340,6 +2340,58 @@ Line 1
|
||||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
|
||||
msg='function on a `re.Match` should give group name as well')
|
||||
|
||||
# Test xml.etree.ElementTree.Element as input obj
|
||||
etree = xml.etree.ElementTree.fromstring('''<?xml version="1.0"?>
|
||||
<data>
|
||||
<country name="Liechtenstein">
|
||||
<rank>1</rank>
|
||||
<year>2008</year>
|
||||
<gdppc>141100</gdppc>
|
||||
<neighbor name="Austria" direction="E"/>
|
||||
<neighbor name="Switzerland" direction="W"/>
|
||||
</country>
|
||||
<country name="Singapore">
|
||||
<rank>4</rank>
|
||||
<year>2011</year>
|
||||
<gdppc>59900</gdppc>
|
||||
<neighbor name="Malaysia" direction="N"/>
|
||||
</country>
|
||||
<country name="Panama">
|
||||
<rank>68</rank>
|
||||
<year>2011</year>
|
||||
<gdppc>13600</gdppc>
|
||||
<neighbor name="Costa Rica" direction="W"/>
|
||||
<neighbor name="Colombia" direction="E"/>
|
||||
</country>
|
||||
</data>''')
|
||||
self.assertEqual(traverse_obj(etree, ''), etree,
|
||||
msg='empty str key should return the element itself')
|
||||
self.assertEqual(traverse_obj(etree, 'country'), list(etree),
|
||||
msg='str key should lead all children with that tag name')
|
||||
self.assertEqual(traverse_obj(etree, ...), list(etree),
|
||||
msg='`...` as key should return all children')
|
||||
self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
|
||||
msg='function as key should get element as value')
|
||||
self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
|
||||
msg='function as key should get index as key')
|
||||
self.assertEqual(traverse_obj(etree, 0), etree[0],
|
||||
msg='int key should return the nth child')
|
||||
self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
|
||||
['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
|
||||
msg='`@<attribute>` at end of path should give that attribute')
|
||||
self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
|
||||
msg='`@<nonexistant>` at end of path should give `None`')
|
||||
self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
|
||||
msg='`@` should give the full attribute dict')
|
||||
self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
|
||||
msg='`text()` at end of path should give the inner text')
|
||||
self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
|
||||
msg='full python xpath features should be supported')
|
||||
self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
|
||||
msg='special transformations should act on current element')
|
||||
self.assertEqual(traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})), [1, 2008, 141100],
|
||||
msg='special transformations should act on current element')
|
||||
|
||||
def test_http_header_dict(self):
|
||||
headers = HTTPHeaderDict()
|
||||
headers['ytdl-test'] = b'0'
|
||||
|
@ -6,6 +6,8 @@ import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from test.helper import verify_address_availability
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import http.client
|
||||
@ -227,6 +229,7 @@ class TestWebsSocketRequestHandlerConformance:
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_source_address(self, handler):
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
verify_address_availability(source_address)
|
||||
with handler(source_address=source_address) as rh:
|
||||
ws = validate_and_send(rh, Request(self.ws_base_url))
|
||||
ws.send('source_address')
|
||||
|
@ -40,7 +40,6 @@ from .networking.exceptions import (
|
||||
NoSupportingHandlers,
|
||||
RequestError,
|
||||
SSLError,
|
||||
_CompatHTTPError,
|
||||
network_exceptions,
|
||||
)
|
||||
from .plugins import directories as plugin_directories
|
||||
@ -581,6 +580,13 @@ class YoutubeDL:
|
||||
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
|
||||
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
|
||||
}
|
||||
_deprecated_multivalue_fields = {
|
||||
'album_artist': 'album_artists',
|
||||
'artist': 'artists',
|
||||
'composer': 'composers',
|
||||
'creator': 'creators',
|
||||
'genre': 'genres',
|
||||
}
|
||||
_format_selection_exts = {
|
||||
'audio': set(MEDIA_EXTENSIONS.common_audio),
|
||||
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
|
||||
@ -2452,7 +2458,7 @@ class YoutubeDL:
|
||||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) best/worst will fallback to
|
||||
# best/worst {video,audio}-only format
|
||||
matches = formats
|
||||
matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
|
||||
elif seperate_fallback and not ctx['has_merged_format']:
|
||||
# for compatibility with youtube-dl when there is no pre-merged format
|
||||
matches = list(filter(seperate_fallback, formats))
|
||||
@ -2641,15 +2647,13 @@ class YoutubeDL:
|
||||
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||
|
||||
deprecated_multivalue_fields = {
|
||||
'artist': 'artists',
|
||||
'composer': 'composers',
|
||||
'album_artist': 'album_artists',
|
||||
'genre': 'genres',
|
||||
}
|
||||
for deprecated_field, new_field in deprecated_multivalue_fields.items():
|
||||
if info_dict.get(deprecated_field):
|
||||
info_dict[new_field] = re.split(r', ?', info_dict[deprecated_field])
|
||||
for old_key, new_key in self._deprecated_multivalue_fields.items():
|
||||
if new_key in info_dict and old_key in info_dict:
|
||||
self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
|
||||
elif old_value := info_dict.get(old_key):
|
||||
info_dict[new_key] = old_value.split(', ')
|
||||
elif new_value := info_dict.get(new_key):
|
||||
info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
|
||||
|
||||
def _raise_pending_errors(self, info):
|
||||
err = info.pop('__pending_error', None)
|
||||
@ -3494,7 +3498,8 @@ class YoutubeDL:
|
||||
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
|
||||
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
|
||||
FFmpegFixupM3u8PP)
|
||||
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
|
||||
ffmpeg_fixup(downloader == 'dashsegments'
|
||||
and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
|
||||
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
|
||||
|
||||
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
|
||||
@ -4120,8 +4125,6 @@ class YoutubeDL:
|
||||
'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
|
||||
'Try using --legacy-server-connect', cause=e) from e
|
||||
raise
|
||||
except HTTPError as e: # TODO: Remove in a future release
|
||||
raise _CompatHTTPError(e) from e
|
||||
|
||||
def build_request_director(self, handlers, preferences=None):
|
||||
logger = _YDLLogger(self)
|
||||
|
@ -31,4 +31,4 @@ def get_hidden_imports():
|
||||
hiddenimports = list(get_hidden_imports())
|
||||
print(f'Adding imports: {hiddenimports}')
|
||||
|
||||
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts']
|
||||
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']
|
||||
|
@ -35,6 +35,7 @@ from .compat_utils import passthrough_module
|
||||
from ..dependencies import brotli as compat_brotli # noqa: F401
|
||||
from ..dependencies import websockets as compat_websockets # noqa: F401
|
||||
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
|
||||
from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401
|
||||
|
||||
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
|
||||
|
||||
@ -70,7 +71,6 @@ compat_html_parser_HTMLParseError = compat_HTMLParseError
|
||||
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
|
||||
compat_http_client = http.client
|
||||
compat_http_server = http.server
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_input = input
|
||||
compat_integer_types = (int, )
|
||||
compat_itertools_count = itertools.count
|
||||
@ -88,7 +88,7 @@ compat_struct_unpack = struct.unpack
|
||||
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
|
||||
compat_tokenize_tokenize = tokenize.tokenize
|
||||
compat_urllib_error = urllib.error
|
||||
compat_urllib_HTTPError = urllib.error.HTTPError
|
||||
compat_urllib_HTTPError = compat_HTTPError
|
||||
compat_urllib_parse = urllib.parse
|
||||
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_quote = urllib.parse.quote
|
||||
|
@ -1,6 +1,7 @@
|
||||
import base64
|
||||
import collections
|
||||
import contextlib
|
||||
import glob
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
import io
|
||||
@ -23,7 +24,8 @@ from .aes import (
|
||||
aes_gcm_decrypt_and_verify_bytes,
|
||||
unpad_pkcs7,
|
||||
)
|
||||
from .compat import functools
|
||||
from .compat import functools # isort: split
|
||||
from .compat import compat_os_name
|
||||
from .dependencies import (
|
||||
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
||||
secretstorage,
|
||||
@ -31,6 +33,7 @@ from .dependencies import (
|
||||
)
|
||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||
from .utils import (
|
||||
DownloadError,
|
||||
Popen,
|
||||
error_to_str,
|
||||
expand_path,
|
||||
@ -122,13 +125,14 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
return YoutubeDLCookieJar()
|
||||
|
||||
if profile is None:
|
||||
search_root = _firefox_browser_dir()
|
||||
search_roots = list(_firefox_browser_dirs())
|
||||
elif _is_path(profile):
|
||||
search_root = profile
|
||||
search_roots = [profile]
|
||||
else:
|
||||
search_root = os.path.join(_firefox_browser_dir(), profile)
|
||||
search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
|
||||
search_root = ', '.join(map(repr, search_roots))
|
||||
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
|
||||
cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
|
||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||
@ -182,12 +186,21 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
cursor.connection.close()
|
||||
|
||||
|
||||
def _firefox_browser_dir():
|
||||
def _firefox_browser_dirs():
|
||||
if sys.platform in ('cygwin', 'win32'):
|
||||
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||
yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
return os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
||||
return os.path.expanduser('~/.mozilla/firefox')
|
||||
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
||||
|
||||
else:
|
||||
yield from map(os.path.expanduser, ('~/.mozilla/firefox', '~/snap/firefox/common/.mozilla/firefox'))
|
||||
|
||||
|
||||
def _firefox_cookie_dbs(roots):
|
||||
for root in map(os.path.abspath, roots):
|
||||
for pattern in ('', '*/', 'Profiles/*/'):
|
||||
yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
|
||||
|
||||
|
||||
def _get_chromium_based_browser_settings(browser_name):
|
||||
@ -268,7 +281,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
||||
logger.error(f'{browser_name} does not support profiles')
|
||||
search_root = config['browser_dir']
|
||||
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
|
||||
cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
|
||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||
@ -307,6 +320,12 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
||||
counts['unencrypted'] = unencrypted_cookies
|
||||
logger.debug(f'cookie version breakdown: {counts}')
|
||||
return jar
|
||||
except PermissionError as error:
|
||||
if compat_os_name == 'nt' and error.errno == 13:
|
||||
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
|
||||
logger.error(message)
|
||||
raise DownloadError(message) # force exit
|
||||
raise
|
||||
finally:
|
||||
if cursor is not None:
|
||||
cursor.connection.close()
|
||||
@ -947,7 +966,7 @@ def _get_windows_v10_key(browser_root, logger):
|
||||
References:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||
"""
|
||||
path = _find_most_recently_used_file(browser_root, 'Local State', logger)
|
||||
path = _newest(_find_files(browser_root, 'Local State', logger))
|
||||
if path is None:
|
||||
logger.error('could not find local state file')
|
||||
return None
|
||||
@ -1049,17 +1068,20 @@ def _get_column_names(cursor, table_name):
|
||||
return [row[1].decode() for row in table_info]
|
||||
|
||||
|
||||
def _find_most_recently_used_file(root, filename, logger):
|
||||
def _newest(files):
|
||||
return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
|
||||
|
||||
|
||||
def _find_files(root, filename, logger):
|
||||
# if there are multiple browser profiles, take the most recently used one
|
||||
i, paths = 0, []
|
||||
i = 0
|
||||
with _create_progress_bar(logger) as progress_bar:
|
||||
for curr_root, dirs, files in os.walk(root):
|
||||
for curr_root, _, files in os.walk(root):
|
||||
for file in files:
|
||||
i += 1
|
||||
progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
|
||||
if file == filename:
|
||||
paths.append(os.path.join(curr_root, file))
|
||||
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
|
||||
yield os.path.join(curr_root, file)
|
||||
|
||||
|
||||
def _merge_cookie_jars(jars):
|
||||
@ -1073,7 +1095,7 @@ def _merge_cookie_jars(jars):
|
||||
|
||||
|
||||
def _is_path(value):
|
||||
return os.path.sep in value
|
||||
return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
|
||||
|
||||
|
||||
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
|
||||
|
@ -369,7 +369,10 @@ class HlsFD(FragmentFD):
|
||||
|
||||
return output.getvalue().encode()
|
||||
|
||||
self.download_and_append_fragments(
|
||||
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
|
||||
if len(fragments) == 1:
|
||||
self.download_and_append_fragments(ctx, fragments, info_dict)
|
||||
else:
|
||||
self.download_and_append_fragments(
|
||||
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
|
||||
else:
|
||||
return self.download_and_append_fragments(ctx, fragments, info_dict)
|
||||
|
@ -47,7 +47,7 @@ from .acast import (
|
||||
ACastChannelIE,
|
||||
)
|
||||
from .acfun import AcFunVideoIE, AcFunBangumiIE
|
||||
from .adn import ADNIE
|
||||
from .adn import ADNIE, ADNSeasonIE
|
||||
from .adobeconnect import AdobeConnectIE
|
||||
from .adobetv import (
|
||||
AdobeTVEmbedIE,
|
||||
@ -93,6 +93,7 @@ from .alura import (
|
||||
AluraIE,
|
||||
AluraCourseIE
|
||||
)
|
||||
from .amadeustv import AmadeusTVIE
|
||||
from .amara import AmaraIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .amazon import (
|
||||
@ -137,6 +138,10 @@ from .ard import (
|
||||
ARDMediathekCollectionIE,
|
||||
ARDIE,
|
||||
)
|
||||
from .art19 import (
|
||||
Art19IE,
|
||||
Art19ShowIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTVIE,
|
||||
ArteTVEmbedIE,
|
||||
@ -144,6 +149,7 @@ from .arte import (
|
||||
ArteTVCategoryIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atscaleconf import AtScaleConfEventIE
|
||||
from .atvat import ATVAtIE
|
||||
@ -251,6 +257,7 @@ from .blogger import BloggerIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .boosty import BoostyIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .boxcast import BoxCastVideoIE
|
||||
@ -345,6 +352,10 @@ from .chingari import (
|
||||
ChingariIE,
|
||||
ChingariUserIE,
|
||||
)
|
||||
from .chzzk import (
|
||||
CHZZKLiveIE,
|
||||
CHZZKVideoIE,
|
||||
)
|
||||
from .cinemax import CinemaxIE
|
||||
from .cinetecamilano import CinetecaMilanoIE
|
||||
from .cineverse import (
|
||||
@ -363,6 +374,7 @@ from .clippit import ClippitIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .closertotruth import CloserToTruthIE
|
||||
from .cloudflarestream import CloudflareStreamIE
|
||||
from .cloudycdn import CloudyCDNIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
@ -540,6 +552,7 @@ from .egghead import (
|
||||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
from .eitb import EitbIE
|
||||
from .elementorembed import ElementorEmbedIE
|
||||
from .elonet import ElonetIE
|
||||
from .elpais import ElPaisIE
|
||||
from .eltrecetv import ElTreceTVIE
|
||||
@ -557,6 +570,7 @@ from .eroprofile import (
|
||||
EroProfileIE,
|
||||
EroProfileAlbumIE,
|
||||
)
|
||||
from .err import ERRJupiterIE
|
||||
from .ertgr import (
|
||||
ERTFlixCodenameIE,
|
||||
ERTFlixIE,
|
||||
@ -581,6 +595,7 @@ from .facebook import (
|
||||
FacebookPluginsVideoIE,
|
||||
FacebookRedirectURLIE,
|
||||
FacebookReelIE,
|
||||
FacebookAdsIE,
|
||||
)
|
||||
from .fancode import (
|
||||
FancodeVodIE,
|
||||
@ -603,6 +618,7 @@ from .filmon import (
|
||||
from .filmweb import FilmwebIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .flextv import FlexTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .floatplane import (
|
||||
FloatplaneIE,
|
||||
@ -680,6 +696,10 @@ from .genius import (
|
||||
GeniusIE,
|
||||
GeniusLyricsIE,
|
||||
)
|
||||
from .getcourseru import (
|
||||
GetCourseRuPlayerIE,
|
||||
GetCourseRuIE
|
||||
)
|
||||
from .gettr import (
|
||||
GettrIE,
|
||||
GettrStreamingIE,
|
||||
@ -787,6 +807,7 @@ from .iheart import (
|
||||
IHeartRadioIE,
|
||||
IHeartRadioPodcastIE,
|
||||
)
|
||||
from .ilpost import IlPostIE
|
||||
from .iltalehti import IltalehtiIE
|
||||
from .imdb import (
|
||||
ImdbIE,
|
||||
@ -899,6 +920,7 @@ from .koo import KooIE
|
||||
from .kth import KTHIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
from .kukululive import KukuluLiveIE
|
||||
from .kusi import KUSIIE
|
||||
from .kuwo import (
|
||||
KuwoIE,
|
||||
@ -987,6 +1009,11 @@ from .lrt import (
|
||||
LRTVODIE,
|
||||
LRTStreamIE
|
||||
)
|
||||
from .lsm import (
|
||||
LSMLREmbedIE,
|
||||
LSMLTVEmbedIE,
|
||||
LSMReplayIE
|
||||
)
|
||||
from .lumni import (
|
||||
LumniIE
|
||||
)
|
||||
@ -996,7 +1023,7 @@ from .lynda import (
|
||||
)
|
||||
from .maariv import MaarivIE
|
||||
from .magellantv import MagellanTVIE
|
||||
from .magentamusik360 import MagentaMusik360IE
|
||||
from .magentamusik import MagentaMusikIE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
MailRuMusicIE,
|
||||
@ -1098,6 +1125,7 @@ from .motherless import (
|
||||
MotherlessIE,
|
||||
MotherlessGroupIE,
|
||||
MotherlessGalleryIE,
|
||||
MotherlessUploaderIE,
|
||||
)
|
||||
from .motorsport import MotorsportIE
|
||||
from .moviepilot import MoviepilotIE
|
||||
@ -1124,6 +1152,11 @@ from .musicdex import (
|
||||
MusicdexArtistIE,
|
||||
MusicdexPlaylistIE,
|
||||
)
|
||||
from .mx3 import (
|
||||
Mx3IE,
|
||||
Mx3NeoIE,
|
||||
Mx3VolksmusikIE,
|
||||
)
|
||||
from .mxplayer import (
|
||||
MxplayerIE,
|
||||
MxplayerShowIE,
|
||||
@ -1216,7 +1249,10 @@ from .nexx import (
|
||||
NexxIE,
|
||||
NexxEmbedIE,
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfb import (
|
||||
NFBIE,
|
||||
NFBSeriesIE,
|
||||
)
|
||||
from .nfhsnetwork import NFHSNetworkIE
|
||||
from .nfl import (
|
||||
NFLIE,
|
||||
@ -1253,6 +1289,7 @@ from .niconico import (
|
||||
NicovideoTagURLIE,
|
||||
NiconicoLiveIE,
|
||||
)
|
||||
from .ninaprotocol import NinaProtocolIE
|
||||
from .ninecninemedia import (
|
||||
NineCNineMediaIE,
|
||||
CPTwentyFourIE,
|
||||
@ -1263,6 +1300,7 @@ from .niconicochannelplus import (
|
||||
NiconicoChannelPlusChannelLivesIE,
|
||||
)
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenews import NineNewsIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .nitter import NitterIE
|
||||
@ -1316,6 +1354,12 @@ from .nytimes import (
|
||||
NYTimesIE,
|
||||
NYTimesArticleIE,
|
||||
NYTimesCookingIE,
|
||||
NYTimesCookingRecipeIE,
|
||||
)
|
||||
from .nuum import (
|
||||
NuumLiveIE,
|
||||
NuumTabIE,
|
||||
NuumMediaIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .nzherald import NZHeraldIE
|
||||
@ -1358,6 +1402,7 @@ from .ora import OraTVIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFFM4StoryIE,
|
||||
ORFONIE,
|
||||
ORFRadioIE,
|
||||
ORFPodcastIE,
|
||||
ORFIPTVIE,
|
||||
@ -1482,7 +1527,7 @@ from .puhutv import (
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .pr0gramm import Pr0grammIE
|
||||
from .prankcast import PrankCastIE
|
||||
from .prankcast import PrankCastIE, PrankCastPostIE
|
||||
from .premiershiprugby import PremiershipRugbyIE
|
||||
from .presstv import PressTVIE
|
||||
from .projectveritas import ProjectVeritasIE
|
||||
@ -1579,6 +1624,7 @@ from .redbulltv import (
|
||||
RedBullIE,
|
||||
)
|
||||
from .reddit import RedditIE
|
||||
from .redge import RedCDNLivxIE
|
||||
from .redgifs import (
|
||||
RedGifsIE,
|
||||
RedGifsSearchIE,
|
||||
@ -1594,7 +1640,10 @@ from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .rheinmaintv import RheinMainTVIE
|
||||
from .rinsefm import RinseFMIE
|
||||
from .rinsefm import (
|
||||
RinseFMIE,
|
||||
RinseFMArtistPlaylistIE,
|
||||
)
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .rokfin import (
|
||||
@ -1710,6 +1759,7 @@ from .scte import (
|
||||
)
|
||||
from .scrolller import ScrolllerIE
|
||||
from .seeker import SeekerIE
|
||||
from .sejmpl import SejmIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import SenateISVPIE, SenateGovIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
@ -2002,6 +2052,7 @@ from .trovo import (
|
||||
TrovoChannelClipIE,
|
||||
)
|
||||
from .trtcocuk import TrtCocukVideoIE
|
||||
from .trtworld import TrtWorldIE
|
||||
from .trueid import TrueIDIE
|
||||
from .trunews import TruNewsIE
|
||||
from .truth import TruthIE
|
||||
@ -2019,7 +2070,6 @@ from .tunein import (
|
||||
TuneInPodcastEpisodeIE,
|
||||
TuneInShortenerIE,
|
||||
)
|
||||
from .turbo import TurboIE
|
||||
from .tv2 import (
|
||||
TV2IE,
|
||||
TV2ArticleIE,
|
||||
@ -2223,6 +2273,7 @@ from .viki import (
|
||||
VikiIE,
|
||||
VikiChannelIE,
|
||||
)
|
||||
from .viously import ViouslyIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .viu import (
|
||||
ViuIE,
|
||||
@ -2271,11 +2322,6 @@ from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
WashingtonPostArticleIE,
|
||||
)
|
||||
from .wasdtv import (
|
||||
WASDTVStreamIE,
|
||||
WASDTVRecordIE,
|
||||
WASDTVClipIE,
|
||||
)
|
||||
from .wat import WatIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
@ -2454,6 +2500,7 @@ from .zee5 import (
|
||||
Zee5SeriesIE,
|
||||
)
|
||||
from .zeenews import ZeeNewsIE
|
||||
from .zetland import ZetlandDKArticleIE
|
||||
from .zhihu import ZhihuIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3IE,
|
||||
|
@ -92,6 +92,8 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
|
||||
_USERTOKEN = None
|
||||
_DEVICE_ID = None
|
||||
_MEDIATOKEN = None
|
||||
@ -136,11 +138,15 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
if self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
|
||||
username, _ = self._get_login_info()
|
||||
AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
|
||||
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
|
||||
AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
|
||||
if AbemaTVBaseIE._USERTOKEN:
|
||||
# try authentication with locally stored token
|
||||
try:
|
||||
AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
|
||||
self._get_media_token(True)
|
||||
return
|
||||
except ExtractorError as e:
|
||||
@ -159,7 +165,6 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
@ -181,6 +186,37 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
return self._MEDIATOKEN
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
auth_cache = {
|
||||
'device_id': AbemaTVBaseIE._DEVICE_ID,
|
||||
'usertoken': AbemaTVBaseIE._USERTOKEN,
|
||||
}
|
||||
self.cache.store(self._NETRC_MACHINE, username, auth_cache)
|
||||
|
||||
def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
|
||||
return self._download_json(
|
||||
f'https://api.abema.io/{endpoint}', video_id, query=query or {},
|
||||
@ -204,7 +240,6 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
class AbemaTVIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
|
||||
'info_dict': {
|
||||
@ -253,33 +288,6 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
}]
|
||||
_TIMETABLE = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# starting download using infojson from this extractor is undefined behavior,
|
||||
# and never be fixed in the future; you must trigger downloads by directly specifying URL.
|
||||
|
@ -3,6 +3,7 @@ import binascii
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
@ -17,17 +18,38 @@ from ..utils import (
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ADNIE(InfoExtractor):
|
||||
class ADNBaseIE(InfoExtractor):
|
||||
IE_DESC = 'Animation Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = f'https://gw.api.{_BASE}/'
|
||||
_PLAYER_BASE_URL = f'{_API_BASE_URL}player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
|
||||
class ADNIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
@ -44,29 +66,35 @@ class ADNIE(InfoExtractor):
|
||||
'season_number': 1,
|
||||
'episode': 'À ce soir !',
|
||||
'episode_number': 1,
|
||||
'thumbnail': str,
|
||||
'season': 'Season 1',
|
||||
},
|
||||
'skip': 'Only available in region (FR, ...)',
|
||||
'skip': 'Only available in French and German speaking Europe',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
|
||||
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
||||
'info_dict': {
|
||||
'id': '23550',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 1,
|
||||
'duration': 1417,
|
||||
'release_date': '20231004',
|
||||
'series': 'The Eminence in Shadow',
|
||||
'season_number': 2,
|
||||
'episode': str,
|
||||
'title': str,
|
||||
'thumbnail': str,
|
||||
'season': 'Season 2',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'description': str,
|
||||
},
|
||||
# 'skip': 'Only available in French and German speaking Europe',
|
||||
}]
|
||||
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, sub_url, video_id):
|
||||
if not sub_url:
|
||||
return None
|
||||
@ -116,6 +144,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
if sub_lang == 'vostf':
|
||||
sub_lang = 'fr'
|
||||
elif sub_lang == 'vostde':
|
||||
sub_lang = 'de'
|
||||
subtitles.setdefault(sub_lang, []).extend([{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(sub),
|
||||
@ -147,7 +177,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
@ -157,12 +187,15 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
user = options['user']
|
||||
if not user.get('hasAccess'):
|
||||
self.raise_login_required()
|
||||
start_date = traverse_obj(options, ('video', 'startDate', {str}))
|
||||
if (parse_iso8601(start_date) or 0) > time.time():
|
||||
raise ExtractorError(f'This video is not available yet. Release date: {start_date}', expected=True)
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
token = self._download_json(
|
||||
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||
video_id, 'Downloading access token', headers={
|
||||
'x-player-refresh-token': user['refreshToken']
|
||||
'X-Player-Refresh-Token': user['refreshToken'],
|
||||
}, data=b'')['token']
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
@ -184,7 +217,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
try:
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization
|
||||
'X-Player-Token': authorization,
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
'adaptive': 'false',
|
||||
@ -232,8 +267,14 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
if format_id == 'vf':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'fr'
|
||||
elif format_id == 'vde':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'de'
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
if not formats:
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
@ -255,3 +296,40 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
|
||||
'comment_count': int_or_none(video.get('commentsCount')),
|
||||
}
|
||||
|
||||
|
||||
class ADNSeasonIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': '911',
|
||||
'title': 'Tokyo Mew Mew New',
|
||||
},
|
||||
# 'skip': 'Only available in French end German speaking Europe',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
|
||||
show = self._download_json(
|
||||
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
|
||||
'Downloading show JSON metadata', headers=self._HEADERS)['show']
|
||||
show_id = str(show['id'])
|
||||
episodes = self._download_json(
|
||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||
'Downloading episode list', headers={
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS
|
||||
}, query={
|
||||
'order': 'asc',
|
||||
'limit': '-1',
|
||||
})
|
||||
|
||||
def entries():
|
||||
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
|
||||
yield self.url_result(
|
||||
f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
|
||||
ADNIE, episode_id)
|
||||
|
||||
return self.playlist_result(entries(), show_id, show.get('title'))
|
||||
|
@ -93,7 +93,7 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
|
77
yt_dlp/extractor/amadeustv.py
Normal file
77
yt_dlp/extractor/amadeustv.py
Normal file
@ -0,0 +1,77 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AmadeusTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?amadeus\.tv/library/(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.amadeus.tv/library/65091a87ff85af59d9fc54c3',
|
||||
'info_dict': {
|
||||
'id': '5576678021301411311',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jieon Park - 第五届珠海莫扎特国际青少年音乐周小提琴C组第三轮',
|
||||
'thumbnail': 'http://1253584441.vod2.myqcloud.com/a0046a27vodtransbj1253584441/7db4af535576678021301411311/coverBySnapshot_10_0.jpg',
|
||||
'duration': 1264.8,
|
||||
'upload_date': '20230918',
|
||||
'timestamp': 1695034800,
|
||||
'display_id': '65091a87ff85af59d9fc54c3',
|
||||
'view_count': int,
|
||||
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nuxt_data = self._search_nuxt_data(webpage, display_id, traverse=('fetch', '0'))
|
||||
video_id = traverse_obj(nuxt_data, ('item', 'video', {str}))
|
||||
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract actual video ID')
|
||||
|
||||
video_data = self._download_json(
|
||||
f'http://playvideo.qcloud.com/getplayinfo/v2/1253584441/{video_id}',
|
||||
video_id, headers={'Referer': 'http://www.amadeus.tv/'})
|
||||
|
||||
formats = []
|
||||
for video in traverse_obj(video_data, ('videoInfo', ('sourceVideo', ('transcodeList', ...)), {dict})):
|
||||
if not url_or_none(video.get('url')):
|
||||
continue
|
||||
formats.append({
|
||||
**traverse_obj(video, {
|
||||
'url': 'url',
|
||||
'format_id': ('definition', {lambda x: f'http-{x or "0"}'}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': (('totalSize', 'size'), {int_or_none}),
|
||||
'vcodec': ('videoStreamList', 0, 'codec'),
|
||||
'acodec': ('audioStreamList', 0, 'codec'),
|
||||
'fps': ('videoStreamList', 0, 'fps', {float_or_none}),
|
||||
}, get_all=False),
|
||||
'http_headers': {'Referer': 'http://www.amadeus.tv/'},
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('videoInfo', 'basicInfo', 'name', {str}),
|
||||
'thumbnail': ('coverInfo', 'coverUrl', {url_or_none}),
|
||||
'duration': ('videoInfo', 'sourceVideo', ('floatDuration', 'duration'), {float_or_none}),
|
||||
}, get_all=False),
|
||||
**traverse_obj(nuxt_data, ('item', {
|
||||
'title': (('title', 'title_en', 'title_cn'), {str}),
|
||||
'description': (('description', 'description_en', 'description_cn'), {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
'view_count': ('view', {int_or_none}),
|
||||
}), get_all=False),
|
||||
}
|
@ -78,14 +78,14 @@ class Ant1NewsGrArticleIE(AntennaBaseIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
|
||||
'md5': '294f18331bb516539d72d85a82887dcc',
|
||||
'md5': '57eb8d12181f0fa2b14b0b138e1de9b6',
|
||||
'info_dict': {
|
||||
'id': '_xvg/m_cmbatw=',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
|
||||
'timestamp': 1603092840,
|
||||
'upload_date': '20201019',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
|
||||
'timestamp': 1666166520,
|
||||
'upload_date': '20221019',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
|
||||
@ -117,7 +117,7 @@ class Ant1NewsGrEmbedIE(AntennaBaseIE):
|
||||
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
|
||||
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
|
||||
_API_PATH = '/news/templates/data/jsonPlayer'
|
||||
_API_PATH = '/templates/data/jsonPlayer'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',
|
||||
|
@ -4,9 +4,11 @@ from functools import partial
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
bug_reports_message,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
make_archive_id,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
@ -233,17 +235,18 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/]+/)?
|
||||
(?:player|live|video)/
|
||||
(?:(?P<display_id>[^?#]+)/)?
|
||||
(?:[^?#]+/)?
|
||||
(?P<id>[a-zA-Z0-9]+)
|
||||
/?(?:[?#]|$)'''
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
|
||||
'info_dict': {
|
||||
'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen',
|
||||
'id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
'display_id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
'id': '12939099',
|
||||
'title': 'Liebe auf vier Pfoten',
|
||||
'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
|
||||
'duration': 5222,
|
||||
@ -255,7 +258,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'series': 'Filme im MDR',
|
||||
'age_limit': 0,
|
||||
'channel': 'MDR',
|
||||
'_old_archive_ids': ['ardbetamediathek 12939099'],
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
@ -276,37 +279,37 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
||||
'md5': '1e73ded21cb79bac065117e80c81dc88',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
||||
'id': '10049223',
|
||||
'ext': 'mp4',
|
||||
'title': 'tagesschau, 20:00 Uhr',
|
||||
'timestamp': 1636398000,
|
||||
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
|
||||
'upload_date': '20211108',
|
||||
'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste',
|
||||
'display_id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
||||
'duration': 915,
|
||||
'episode': 'tagesschau, 20:00 Uhr',
|
||||
'series': 'tagesschau',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
|
||||
'channel': 'ARD-Aktuell',
|
||||
'_old_archive_ids': ['ardbetamediathek 10049223'],
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'md5': 'c428b9effff18ff624d4f903bda26315',
|
||||
'info_dict': {
|
||||
'id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'id': '94834686',
|
||||
'ext': 'mp4',
|
||||
'duration': 2700,
|
||||
'episode': '7 Tage ... unter harten Jungs',
|
||||
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
|
||||
'upload_date': '20231005',
|
||||
'timestamp': 1696491171,
|
||||
'display_id': '7-tage/7-tage-unter-harten-jungs/hr-fernsehen',
|
||||
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'series': '7 Tage ...',
|
||||
'channel': 'HR',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
|
||||
'title': '7 Tage ... unter harten Jungs',
|
||||
'_old_archive_ids': ['ardbetamediathek 94834686'],
|
||||
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
@ -357,13 +360,39 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
}), get_all=False)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
display_id = self._match_id(url)
|
||||
query = {'embedded': 'false', 'mcV6': 'true'}
|
||||
headers = {}
|
||||
|
||||
if self._get_cookies(self._TOKEN_URL).get('ams'):
|
||||
token = self._download_json(
|
||||
self._TOKEN_URL, display_id, 'Fetching token for age verification',
|
||||
'Unable to fetch age verification token', fatal=False)
|
||||
id_token = traverse_obj(token, ('idToken', {str}))
|
||||
decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
|
||||
user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
|
||||
if not user_id:
|
||||
self.report_warning('Unable to extract token, continuing without authentication')
|
||||
else:
|
||||
headers['x-authorization'] = f'Bearer {id_token}'
|
||||
query['userId'] = user_id
|
||||
if decoded_token.get('age_rating') != 18:
|
||||
self.report_warning('Account is not verified as 18+; video may be unavailable')
|
||||
|
||||
page_data = self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{video_id}', video_id, query={
|
||||
'embedded': 'false',
|
||||
'mcV6': 'true',
|
||||
})
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
|
||||
display_id, query=query, headers=headers)
|
||||
|
||||
# For user convenience we use the old contentId instead of the longer crid
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
|
||||
old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId', {int}))
|
||||
if old_id is not None:
|
||||
video_id = str(old_id)
|
||||
archive_ids = [make_archive_id(ARDBetaMediathekIE, display_id)]
|
||||
else:
|
||||
self.report_warning(f'Could not extract contentId{bug_reports_message()}')
|
||||
video_id = display_id
|
||||
archive_ids = None
|
||||
|
||||
player_data = traverse_obj(
|
||||
page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False)
|
||||
@ -371,7 +400,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
|
||||
|
||||
if player_data.get('blockedByFsk'):
|
||||
self.raise_no_formats('This video is only available after 22:00', expected=True)
|
||||
self.raise_login_required('This video is only available for age verified users or after 22:00')
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
@ -419,8 +448,6 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
})
|
||||
|
||||
age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none}))
|
||||
old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
@ -438,7 +465,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'channel': 'clipSourceName',
|
||||
})),
|
||||
**self._extract_episode_info(page_data.get('title')),
|
||||
'_old_archive_ids': [make_archive_id(ARDBetaMediathekIE, old_id)],
|
||||
'_old_archive_ids': archive_ids,
|
||||
}
|
||||
|
||||
|
||||
|
303
yt_dlp/extractor/art19.py
Normal file
303
yt_dlp/extractor/art19.py
Normal file
@ -0,0 +1,303 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, int_or_none, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Art19IE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
|
||||
_VALID_URL = [
|
||||
rf'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})',
|
||||
rf'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
|
||||
'info_dict': {
|
||||
'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
|
||||
'ext': 'mp3',
|
||||
'title': 'Why Did DeSantis Drop Out?',
|
||||
'series': 'The Daily Briefing',
|
||||
'release_timestamp': 1705941275,
|
||||
'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
|
||||
'episode': 'Episode 582',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
|
||||
'upload_date': '20240122',
|
||||
'timestamp': 1705940815,
|
||||
'episode_number': 582,
|
||||
'modified_date': '20240122',
|
||||
'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
|
||||
'modified_timestamp': 1705941275,
|
||||
'release_date': '20240122',
|
||||
'duration': 527.4,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'info_dict': {
|
||||
'id': '8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'ext': 'mp3',
|
||||
'title': 'Martha Stewart: The Homemaker Hustler Part 2',
|
||||
'modified_date': '20240116',
|
||||
'upload_date': '20240105',
|
||||
'modified_timestamp': 1705435802,
|
||||
'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'description': 'md5:4aa7cfd1358dc57e729835bc208d7893',
|
||||
'release_timestamp': 1705305660,
|
||||
'release_date': '20240115',
|
||||
'timestamp': 1704481536,
|
||||
'episode_number': 88,
|
||||
'series': 'Scamfluencers',
|
||||
'duration': 2588.37501,
|
||||
'episode': 'Episode 88',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
|
||||
'info_dict': {
|
||||
'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
|
||||
'ext': 'mp3',
|
||||
'title': "'Verstappen wordt een synoniem voor Formule 1'",
|
||||
'season': 'Seizoen 6',
|
||||
'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
|
||||
'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
|
||||
'duration': 3061.82111,
|
||||
'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
|
||||
'release_date': '20231126',
|
||||
'modified_timestamp': 1701156004,
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'season_number': 6,
|
||||
'episode_number': 52,
|
||||
'modified_date': '20231128',
|
||||
'upload_date': '20231126',
|
||||
'timestamp': 1701025981,
|
||||
'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
|
||||
'series': 'De Boordradio',
|
||||
'release_timestamp': 1701026308,
|
||||
'episode': 'Episode 52',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
|
||||
'info_dict': {
|
||||
'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
|
||||
'ext': 'mp3',
|
||||
'title': 'Larry Bucshon announces retirement from congress',
|
||||
'upload_date': '20240115',
|
||||
'episode_number': 148,
|
||||
'episode': 'Episode 148',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'release_date': '20240115',
|
||||
'timestamp': 1705328205,
|
||||
'release_timestamp': 1705329275,
|
||||
'series': 'All INdiana Politics',
|
||||
'modified_date': '20240117',
|
||||
'modified_timestamp': 1705458901,
|
||||
'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
|
||||
'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
|
||||
'description': 'md5:53b5239e4d14973a87125c217c255b2a',
|
||||
'duration': 1256.18848,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for episode_id in re.findall(
|
||||
rf'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage):
|
||||
yield f'https://rss.art19.com/episodes/{episode_id}.mp3'
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
|
||||
player_metadata = self._download_json(
|
||||
f'https://art19.com/episodes/{episode_id}', episode_id,
|
||||
note='Downloading player metadata', fatal=False,
|
||||
headers={'Accept': 'application/vnd.art19.v0+json'})
|
||||
rss_metadata = self._download_json(
|
||||
f'https://rss.art19.com/episodes/{episode_id}.json', episode_id, fatal=False,
|
||||
note='Downloading RSS metadata')
|
||||
|
||||
formats = [{
|
||||
'format_id': 'direct',
|
||||
'url': f'https://rss.art19.com/episodes/{episode_id}.mp3',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3',
|
||||
}]
|
||||
for fmt_id, fmt_data in traverse_obj(rss_metadata, ('content', 'media', {dict.items}, ...)):
|
||||
if fmt_id == 'waveform_bin':
|
||||
continue
|
||||
fmt_url = traverse_obj(fmt_data, ('url', {url_or_none}))
|
||||
if not fmt_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': fmt_id,
|
||||
'url': fmt_url,
|
||||
'vcodec': 'none',
|
||||
'acodec': fmt_id,
|
||||
'quality': -2 if fmt_id == 'ogg' else -1,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player_metadata, ('episode', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description_plain', {str}),
|
||||
'episode_id': ('id', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'release_timestamp': ('released_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601})
|
||||
})),
|
||||
**traverse_obj(rss_metadata, ('content', {
|
||||
'title': ('episode_title', {str}),
|
||||
'description': ('episode_description_plain', {str}),
|
||||
'episode_id': ('episode_id', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'season': ('season_title', {str}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'season_number': ('season_number', {int_or_none}),
|
||||
'series': ('series_title', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'thumbnail': ('cover_image', {url_or_none}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class Art19ShowIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
|
||||
_VALID_URL = [
|
||||
rf'{_VALID_URL_BASE}(?:$|[#?])',
|
||||
r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
|
||||
'display_id': 'echt-gebeurd',
|
||||
'title': 'Echt Gebeurd',
|
||||
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
|
||||
'timestamp': 1492642167,
|
||||
'upload_date': '20170419',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:7',
|
||||
},
|
||||
'playlist_mincount': 425,
|
||||
}, {
|
||||
'url': 'https://www.art19.com/shows/echt-gebeurd',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
|
||||
'display_id': 'echt-gebeurd',
|
||||
'title': 'Echt Gebeurd',
|
||||
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
|
||||
'timestamp': 1492642167,
|
||||
'upload_date': '20170419',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:7',
|
||||
},
|
||||
'playlist_mincount': 425,
|
||||
}, {
|
||||
'url': 'https://rss.art19.com/scamfluencers',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
|
||||
'display_id': 'scamfluencers',
|
||||
'title': 'Scamfluencers',
|
||||
'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7',
|
||||
'timestamp': 1647368573,
|
||||
'upload_date': '20220315',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': [],
|
||||
},
|
||||
'playlist_mincount': 90,
|
||||
}, {
|
||||
'url': 'https://art19.com/shows/enthuellt/embed',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
|
||||
'display_id': 'enthuellt',
|
||||
'title': 'Enthüllt',
|
||||
'description': 'md5:17752246643414a2fd51744fc9a1c08e',
|
||||
'timestamp': 1601645860,
|
||||
'upload_date': '20201002',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:10',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
|
||||
'display_id': 'deconstructing-yourself',
|
||||
'title': 'Deconstructing Yourself',
|
||||
'description': 'md5:dab5082b28b248a35476abf64768854d',
|
||||
'timestamp': 1570581181,
|
||||
'upload_date': '20191009',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:5',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
|
||||
'display_id': 'the-ben-joravsky-show',
|
||||
'title': 'The Ben Joravsky Show',
|
||||
'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
|
||||
'timestamp': 1550875095,
|
||||
'upload_date': '20190222',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
|
||||
},
|
||||
'playlist_mincount': 1900,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for series_id in re.findall(
|
||||
r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage):
|
||||
yield f'https://art19.com/shows/{series_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
series_metadata = self._download_json(
|
||||
f'https://art19.com/series/{series_id}', series_id, note='Downloading series metadata',
|
||||
headers={'Accept': 'application/vnd.art19.v0+json'})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [
|
||||
self.url_result(f'https://rss.art19.com/episodes/{episode_id}.mp3', Art19IE)
|
||||
for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', ..., {str}))
|
||||
],
|
||||
**traverse_obj(series_metadata, ('series', {
|
||||
'id': ('id', {str}),
|
||||
'display_id': ('slug', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description_plain', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
})),
|
||||
'tags': traverse_obj(series_metadata, ('tags', ..., 'name', {str})),
|
||||
}
|
@ -70,7 +70,24 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
|
||||
'upload_date': '20230930',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
|
||||
'info_dict': {
|
||||
'id': '085374-003-A',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
|
||||
'timestamp': 1702872000,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
|
||||
'duration': 2594,
|
||||
'title': 'Die kurze Zeit der Jugend',
|
||||
'alt_title': 'Im hohen Norden geboren',
|
||||
'upload_date': '20231218',
|
||||
'subtitles': {
|
||||
'fr': 'mincount:1',
|
||||
'fr-acc': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}]
|
||||
|
||||
_GEO_BYPASS = True
|
||||
@ -121,6 +138,16 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fix_accessible_subs_locale(subs):
|
||||
updated_subs = {}
|
||||
for lang, sub_formats in subs.items():
|
||||
for format in sub_formats:
|
||||
if format.get('url', '').endswith('-MAL.m3u8'):
|
||||
lang += '-acc'
|
||||
updated_subs.setdefault(lang, []).append(format)
|
||||
return updated_subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
@ -174,6 +201,7 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
secondary_formats.extend(fmts)
|
||||
else:
|
||||
formats.extend(fmts)
|
||||
subs = self._fix_accessible_subs_locale(subs)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
elif stream['protocol'] in ('HTTPS', 'RTMP'):
|
||||
|
168
yt_dlp/extractor/asobichannel.py
Normal file
168
yt_dlp/extractor/asobichannel.py
Normal file
@ -0,0 +1,168 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AsobiChannelBaseIE(InfoExtractor):
|
||||
_MICROCMS_HEADER = {'X-MICROCMS-API-KEY': 'qRaKehul9AHU8KtL0dnq1OCLKnFec6yrbcz3'}
|
||||
|
||||
def _extract_info(self, metadata):
|
||||
return traverse_obj(metadata, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': ('contents', 'video_thumb', 'url', {url_or_none}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
})
|
||||
|
||||
|
||||
class AsobiChannelIE(AsobiChannelBaseIE):
|
||||
IE_NAME = 'asobichannel'
|
||||
IE_DESC = 'ASOBI CHANNEL'
|
||||
|
||||
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://asobichannel.asobistore.jp/watch/1ypp48qd32p',
|
||||
'md5': '39df74e872afe032c4eb27b89144fc92',
|
||||
'info_dict': {
|
||||
'id': '1ypp48qd32p',
|
||||
'ext': 'mp4',
|
||||
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
|
||||
'description': 'md5:b930bd2199c9b2fd75951ce4aaa7efd2',
|
||||
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/a8e6f84119f54eb9ab4ce16729239905/%E3%82%B5%E3%83%A0%E3%83%8D%20(1).png',
|
||||
'timestamp': 1697098247,
|
||||
'upload_date': '20231012',
|
||||
'modified_timestamp': 1698381162,
|
||||
'modified_date': '20231027',
|
||||
'channel': 'アイドルマスター',
|
||||
'channel_id': 'idolmaster',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://asobichannel.asobistore.jp/watch/redigiwnjzqj',
|
||||
'md5': '229fa8fb5c591c75ce8c37a497f113f6',
|
||||
'info_dict': {
|
||||
'id': 'redigiwnjzqj',
|
||||
'ext': 'mp4',
|
||||
'title': '【おまけ放送】アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
|
||||
'description': 'md5:7d9cd35fb54425a6967822bd564ea2d9',
|
||||
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/20e5c1d6184242eebc2512a5dec59bf0/P1_%E5%8E%9F%E3%81%A3%E3%81%B1%E3%82%B5%E3%83%A0%E3%83%8D.png',
|
||||
'modified_timestamp': 1697797125,
|
||||
'modified_date': '20231020',
|
||||
'timestamp': 1697261769,
|
||||
'upload_date': '20231014',
|
||||
'channel': 'アイドルマスター',
|
||||
'channel_id': 'idolmaster',
|
||||
},
|
||||
}]
|
||||
|
||||
_survapi_header = None
|
||||
|
||||
def _real_initialize(self):
|
||||
token = self._download_json(
|
||||
'https://asobichannel-api.asobistore.jp/api/v1/vspf/token', None,
|
||||
note='Retrieving API token')
|
||||
self._survapi_header = {'Authorization': f'Bearer {token}'}
|
||||
|
||||
def _process_vod(self, video_id, metadata):
|
||||
content_id = metadata['contents']['video_id']
|
||||
|
||||
vod_data = self._download_json(
|
||||
f'https://survapi.channel.or.jp/proxy/v1/contents/{content_id}/get_by_cuid', video_id,
|
||||
headers=self._survapi_header, note='Downloading vod data')
|
||||
|
||||
return {
|
||||
'formats': self._extract_m3u8_formats(vod_data['ex_content']['streaming_url'], video_id),
|
||||
}
|
||||
|
||||
def _process_live(self, video_id, metadata):
|
||||
content_id = metadata['contents']['video_id']
|
||||
event_data = self._download_json(
|
||||
f'https://survapi.channel.or.jp/ex/events/{content_id}?embed=channel', video_id,
|
||||
headers=self._survapi_header, note='Downloading event data')
|
||||
|
||||
player_type = traverse_obj(event_data, ('data', 'Player_type', {str}))
|
||||
if player_type == 'poster':
|
||||
self.raise_no_formats('Live event has not yet started', expected=True)
|
||||
live_status = 'is_upcoming'
|
||||
formats = []
|
||||
elif player_type == 'player':
|
||||
live_status = 'is_live'
|
||||
formats = self._extract_m3u8_formats(
|
||||
event_data['data']['Channel']['Custom_live_url'], video_id, live=True)
|
||||
else:
|
||||
raise ExtractorError('Unsupported player type {player_type!r}')
|
||||
|
||||
return {
|
||||
'release_timestamp': traverse_obj(metadata, ('period', 'start', {parse_iso8601})),
|
||||
'live_status': live_status,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
f'https://channel.microcms.io/api/v1/media/{video_id}', video_id,
|
||||
headers=self._MICROCMS_HEADER)
|
||||
|
||||
info = self._extract_info(metadata)
|
||||
|
||||
video_type = traverse_obj(metadata, ('contents', 'video_type', 0, {str}))
|
||||
if video_type == 'VOD':
|
||||
return merge_dicts(info, self._process_vod(video_id, metadata))
|
||||
if video_type == 'LIVE':
|
||||
return merge_dicts(info, self._process_live(video_id, metadata))
|
||||
|
||||
raise ExtractorError(f'Unexpected video type {video_type!r}')
|
||||
|
||||
|
||||
class AsobiChannelTagURLIE(AsobiChannelBaseIE):
|
||||
IE_NAME = 'asobichannel:tag'
|
||||
IE_DESC = 'ASOBI CHANNEL'
|
||||
|
||||
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/tag/(?P<id>[a-z0-9-_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://asobichannel.asobistore.jp/tag/bjhh-nbcja',
|
||||
'info_dict': {
|
||||
'id': 'bjhh-nbcja',
|
||||
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://asobichannel.asobistore.jp/tag/hvm5qw3c6od',
|
||||
'info_dict': {
|
||||
'id': 'hvm5qw3c6od',
|
||||
'title': 'アイマスMOIW2023ラジオ',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
tag_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, tag_id)
|
||||
title = traverse_obj(self._search_nextjs_data(
|
||||
webpage, tag_id, fatal=False), ('props', 'pageProps', 'data', 'name', {str}))
|
||||
|
||||
media = self._download_json(
|
||||
f'https://channel.microcms.io/api/v1/media?limit=999&filters=(tag[contains]{tag_id})',
|
||||
tag_id, headers=self._MICROCMS_HEADER)
|
||||
|
||||
def entries():
|
||||
for metadata in traverse_obj(media, ('contents', lambda _, v: v['id'])):
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': f'https://asobichannel.asobistore.jp/watch/{metadata["id"]}',
|
||||
'ie_key': AsobiChannelIE.ie_key(),
|
||||
**self._extract_info(metadata),
|
||||
}
|
||||
|
||||
return self.playlist_result(entries(), tag_id, title)
|
@ -7,6 +7,7 @@ import math
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..dependencies import Cryptodome
|
||||
@ -18,6 +19,7 @@ from ..utils import (
|
||||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
@ -1303,6 +1305,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'upload_date': '20211127',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
|
||||
'info_dict': {
|
||||
'id': 'BV1DU4y1r7tz',
|
||||
'ext': 'mp4',
|
||||
'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
|
||||
'upload_date': '20220820',
|
||||
'description': '',
|
||||
'timestamp': 1661016330,
|
||||
'uploader_id': '1958703906',
|
||||
'uploader': '靡烟miya',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'duration': 9552.903,
|
||||
'tags': list,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'_old_archive_ids': ['bilibili 687146339_part1'],
|
||||
},
|
||||
'params': {'noplaylist': True},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
|
||||
'info_dict': {
|
||||
@ -1354,6 +1376,11 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
|
||||
bvid = traverse_obj(parse_qs(url), ('bvid', 0))
|
||||
if not self._yes_playlist(list_id, bvid):
|
||||
return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
|
||||
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
|
||||
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
|
||||
@ -1463,8 +1490,37 @@ class BiliBiliSearchIE(SearchInfoExtractor):
|
||||
IE_DESC = 'Bilibili video search'
|
||||
_MAX_RESULTS = 100000
|
||||
_SEARCH_KEY = 'bilisearch'
|
||||
_TESTS = [{
|
||||
'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||
'playlist_count': 3,
|
||||
'info_dict': {
|
||||
'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||
'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'BV1n44y1Q7sc',
|
||||
'ext': 'mp4',
|
||||
'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
|
||||
'timestamp': 1669889987,
|
||||
'upload_date': '20221201',
|
||||
'description': 'md5:43343c0973defff527b5a4b403b4abf9',
|
||||
'tags': list,
|
||||
'uploader': '靡烟miya',
|
||||
'duration': 123.156,
|
||||
'uploader_id': '1958703906',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 988222410_part1'],
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
def _search_results(self, query):
|
||||
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
|
||||
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
|
||||
for page_num in itertools.count(1):
|
||||
videos = self._download_json(
|
||||
'https://api.bilibili.com/x/web-interface/search/type', query,
|
||||
@ -1621,6 +1677,7 @@ class BiliBiliPlayerIE(InfoExtractor):
|
||||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
|
||||
def _call_api(self, endpoint, *args, **kwargs):
|
||||
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
|
||||
@ -1658,19 +1715,34 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
'aid': aid,
|
||||
})) or {}
|
||||
subtitles = {}
|
||||
for sub in sub_json.get('subtitles') or []:
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
sub_data = self._download_json(
|
||||
sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
|
||||
note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
|
||||
if not sub_data:
|
||||
continue
|
||||
subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(sub_data)
|
||||
})
|
||||
fetched_urls = set()
|
||||
for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
|
||||
for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
|
||||
if url in fetched_urls:
|
||||
continue
|
||||
fetched_urls.add(url)
|
||||
sub_ext = determine_ext(url)
|
||||
sub_lang = sub.get('lang_key') or 'en'
|
||||
|
||||
if sub_ext == 'ass':
|
||||
subtitles.setdefault(sub_lang, []).append({
|
||||
'ext': 'ass',
|
||||
'url': url,
|
||||
})
|
||||
elif sub_ext == 'json':
|
||||
sub_data = self._download_json(
|
||||
url, ep_id or aid, fatal=False,
|
||||
note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
|
||||
errnote='Unable to download subtitles')
|
||||
|
||||
if sub_data:
|
||||
subtitles.setdefault(sub_lang, []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(sub_data),
|
||||
})
|
||||
else:
|
||||
self.report_warning('Unexpected subtitle extension', ep_id or aid)
|
||||
|
||||
return subtitles
|
||||
|
||||
def _get_formats(self, *, ep_id=None, aid=None):
|
||||
@ -1716,7 +1788,9 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
def _parse_video_metadata(self, video_data):
|
||||
return {
|
||||
'title': video_data.get('title_display') or video_data.get('title'),
|
||||
'description': video_data.get('desc'),
|
||||
'thumbnail': video_data.get('cover'),
|
||||
'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
|
||||
}
|
||||
@ -1813,17 +1887,6 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'episode_number': 140,
|
||||
},
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
|
||||
}, {
|
||||
'url': 'https://www.bilibili.tv/en/video/2041863208',
|
||||
'info_dict': {
|
||||
'id': '2041863208',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1670874843,
|
||||
'description': 'Scheduled for April 2023.\nStudio: ufotable',
|
||||
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
|
||||
'upload_date': '20221212',
|
||||
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
|
||||
},
|
||||
}, {
|
||||
# episode comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/play/34580/340317',
|
||||
@ -1864,9 +1927,9 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
|
||||
'timestamp': 1667891924,
|
||||
'upload_date': '20221108',
|
||||
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
|
||||
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
|
||||
'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
@ -1929,10 +1992,12 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
|
||||
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
|
||||
return merge_dicts(
|
||||
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
|
||||
'title': self._html_search_meta('og:title', webpage),
|
||||
'description': self._html_search_meta('og:description', webpage)
|
||||
})
|
||||
self._parse_video_metadata(video_data), {
|
||||
'title': get_element_by_class(
|
||||
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
|
||||
'description': get_element_by_class(
|
||||
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
|
||||
}, self._search_json_ld(webpage, video_id, default={}))
|
||||
|
||||
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
|
||||
comment_api_raw_data = self._download_json(
|
||||
@ -2020,7 +2085,8 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'formats': self._get_formats(ep_id=ep_id, aid=aid),
|
||||
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
|
||||
'chapters': chapters,
|
||||
'__post_extractor': self.extract_comments(video_id, ep_id)
|
||||
'__post_extractor': self.extract_comments(video_id, ep_id),
|
||||
'http_headers': self._HEADERS,
|
||||
}
|
||||
|
||||
|
||||
|
209
yt_dlp/extractor/boosty.py
Normal file
209
yt_dlp/extractor/boosty.py
Normal file
@ -0,0 +1,209 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BoostyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?boosty\.to/(?P<user>[^/#?]+)/posts/(?P<post_id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
# single ok_video
|
||||
'url': 'https://boosty.to/kuplinov/posts/e55d050c-e3bb-4873-a7db-ac7a49b40c38',
|
||||
'info_dict': {
|
||||
'id': 'd7473824-352e-48e2-ae53-d4aa39459968',
|
||||
'title': 'phasma_3',
|
||||
'channel': 'Kuplinov',
|
||||
'channel_id': '7958701',
|
||||
'timestamp': 1655031975,
|
||||
'upload_date': '20220612',
|
||||
'release_timestamp': 1655049000,
|
||||
'release_date': '20220612',
|
||||
'modified_timestamp': 1668680993,
|
||||
'modified_date': '20221117',
|
||||
'tags': ['куплинов', 'phasmophobia'],
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 105,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||
},
|
||||
}, {
|
||||
# multiple ok_video
|
||||
'url': 'https://boosty.to/maddyson/posts/0c652798-3b35-471f-8b48-a76a0b28736f',
|
||||
'info_dict': {
|
||||
'id': '0c652798-3b35-471f-8b48-a76a0b28736f',
|
||||
'title': 'то что не пропустил юта6',
|
||||
'channel': 'Илья Давыдов',
|
||||
'channel_id': '6808257',
|
||||
'timestamp': 1694017040,
|
||||
'upload_date': '20230906',
|
||||
'release_timestamp': 1694017040,
|
||||
'release_date': '20230906',
|
||||
'modified_timestamp': 1694071178,
|
||||
'modified_date': '20230907',
|
||||
'like_count': int,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'cc325a9f-a563-41c6-bf47-516c1b506c9a',
|
||||
'title': 'то что не пропустил юта6',
|
||||
'channel': 'Илья Давыдов',
|
||||
'channel_id': '6808257',
|
||||
'timestamp': 1694017040,
|
||||
'upload_date': '20230906',
|
||||
'release_timestamp': 1694017040,
|
||||
'release_date': '20230906',
|
||||
'modified_timestamp': 1694071178,
|
||||
'modified_date': '20230907',
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 31204,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'd07b0a72-9493-4512-b54e-55ce468fd4b7',
|
||||
'title': 'то что не пропустил юта6',
|
||||
'channel': 'Илья Давыдов',
|
||||
'channel_id': '6808257',
|
||||
'timestamp': 1694017040,
|
||||
'upload_date': '20230906',
|
||||
'release_timestamp': 1694017040,
|
||||
'release_date': '20230906',
|
||||
'modified_timestamp': 1694071178,
|
||||
'modified_date': '20230907',
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 25704,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '4a3bba32-78c8-422a-9432-2791aff60b42',
|
||||
'title': 'то что не пропустил юта6',
|
||||
'channel': 'Илья Давыдов',
|
||||
'channel_id': '6808257',
|
||||
'timestamp': 1694017040,
|
||||
'upload_date': '20230906',
|
||||
'release_timestamp': 1694017040,
|
||||
'release_date': '20230906',
|
||||
'modified_timestamp': 1694071178,
|
||||
'modified_date': '20230907',
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 31867,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
# single external video (youtube)
|
||||
'url': 'https://boosty.to/denischuzhoy/posts/6094a487-bcec-4cf8-a453-43313b463c38',
|
||||
'info_dict': {
|
||||
'id': 'EXelTnve5lY',
|
||||
'title': 'Послание Президента Федеральному Собранию | Класс народа',
|
||||
'upload_date': '20210425',
|
||||
'channel': 'Денис Чужой',
|
||||
'tags': 'count:10',
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 816,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.ytimg\.com/',
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'categories': list,
|
||||
'channel_follower_count': int,
|
||||
'channel_id': 'UCCzVNbWZfYpBfyofCCUD_0w',
|
||||
'channel_is_verified': bool,
|
||||
'channel_url': r're:^https://www\.youtube\.com/',
|
||||
'comment_count': int,
|
||||
'description': str,
|
||||
'heatmap': 'count:100',
|
||||
'live_status': str,
|
||||
'playable_in_embed': bool,
|
||||
'uploader': str,
|
||||
'uploader_id': str,
|
||||
'uploader_url': r're:^https://www\.youtube\.com/',
|
||||
},
|
||||
}]
|
||||
|
||||
_MP4_TYPES = ('tiny', 'lowest', 'low', 'medium', 'high', 'full_hd', 'quad_hd', 'ultra_hd')
|
||||
|
||||
def _extract_formats(self, player_urls, video_id):
|
||||
formats = []
|
||||
quality = qualities(self._MP4_TYPES)
|
||||
for player_url in traverse_obj(player_urls, lambda _, v: url_or_none(v['url'])):
|
||||
url = player_url['url']
|
||||
format_type = player_url.get('type')
|
||||
if format_type in ('hls', 'hls_live', 'live_ondemand_hls', 'live_playback_hls'):
|
||||
formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id='hls', fatal=False))
|
||||
elif format_type in ('dash', 'dash_live', 'live_playback_dash'):
|
||||
formats.extend(self._extract_mpd_formats(url, video_id, mpd_id='dash', fatal=False))
|
||||
elif format_type in self._MP4_TYPES:
|
||||
formats.append({
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
'format_id': format_type,
|
||||
'quality': quality(format_type),
|
||||
})
|
||||
else:
|
||||
self.report_warning(f'Unknown format type: {format_type!r}')
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
user, post_id = self._match_valid_url(url).group('user', 'post_id')
|
||||
post = self._download_json(
|
||||
f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
|
||||
note='Downloading post data', errnote='Unable to download post data')
|
||||
|
||||
post_title = post.get('title')
|
||||
if not post_title:
|
||||
self.report_warning('Unable to extract post title. Falling back to parsing html page')
|
||||
webpage = self._download_webpage(url, video_id=post_id)
|
||||
post_title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
|
||||
|
||||
common_metadata = {
|
||||
'title': post_title,
|
||||
**traverse_obj(post, {
|
||||
'channel': ('user', 'name', {str}),
|
||||
'channel_id': ('user', 'id', {str_or_none}),
|
||||
'timestamp': ('createdAt', {int_or_none}),
|
||||
'release_timestamp': ('publishTime', {int_or_none}),
|
||||
'modified_timestamp': ('updatedAt', {int_or_none}),
|
||||
'tags': ('tags', ..., 'title', {str}),
|
||||
'like_count': ('count', 'likes', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
entries = []
|
||||
for item in traverse_obj(post, ('data', ..., {dict})):
|
||||
item_type = item.get('type')
|
||||
if item_type == 'video' and url_or_none(item.get('url')):
|
||||
entries.append(self.url_result(item['url'], YoutubeIE))
|
||||
elif item_type == 'ok_video':
|
||||
video_id = item.get('id') or post_id
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': self._extract_formats(item.get('playerUrls'), video_id),
|
||||
**common_metadata,
|
||||
**traverse_obj(item, {
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('viewsCounter', {int_or_none}),
|
||||
'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
|
||||
}, get_all=False)})
|
||||
|
||||
if not entries:
|
||||
raise ExtractorError('No videos found', expected=True)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries, post_id, post_title, **common_metadata)
|
@ -1,6 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
@ -60,6 +61,7 @@ class CCMAIE(InfoExtractor):
|
||||
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||
'media': media_type,
|
||||
'idint': media_id,
|
||||
'format': 'dm',
|
||||
})
|
||||
|
||||
formats = []
|
||||
@ -69,6 +71,10 @@ class CCMAIE(InfoExtractor):
|
||||
format_url = url_or_none(format_.get('file'))
|
||||
if not format_url:
|
||||
continue
|
||||
if determine_ext(format_url) == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, media_id, mpd_id='dash', fatal=False))
|
||||
continue
|
||||
label = format_.get('label')
|
||||
f = parse_resolution(label)
|
||||
f.update({
|
||||
|
139
yt_dlp/extractor/chzzk.py
Normal file
139
yt_dlp/extractor/chzzk.py
Normal file
@ -0,0 +1,139 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CHZZKLiveIE(InfoExtractor):
|
||||
IE_NAME = 'chzzk:live'
|
||||
_VALID_URL = r'https?://chzzk\.naver\.com/live/(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://chzzk.naver.com/live/c68b8ef525fb3d2fa146344d84991753',
|
||||
'info_dict': {
|
||||
'id': 'c68b8ef525fb3d2fa146344d84991753',
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'channel': '진짜도현',
|
||||
'channel_id': 'c68b8ef525fb3d2fa146344d84991753',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1705510344,
|
||||
'upload_date': '20240117',
|
||||
'live_status': 'is_live',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'skip': 'The channel is not currently live',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
live_detail = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
|
||||
note='Downloading channel info', errnote='Unable to download channel info')['content']
|
||||
|
||||
if live_detail.get('status') == 'CLOSE':
|
||||
raise ExtractorError('The channel is not currently live', expected=True)
|
||||
|
||||
live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_template = traverse_obj(
|
||||
live_playback, ('thumbnail', 'snapshotThumbnailTemplate', {url_or_none}))
|
||||
if thumbnail_template and '{type}' in thumbnail_template:
|
||||
for width in traverse_obj(live_playback, ('thumbnail', 'types', ..., {str})):
|
||||
thumbnails.append({
|
||||
'id': width,
|
||||
'url': thumbnail_template.replace('{type}', width),
|
||||
'width': int_or_none(width),
|
||||
})
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for media in traverse_obj(live_playback, ('media', lambda _, v: url_or_none(v['path']))):
|
||||
is_low_latency = media.get('mediaId') == 'LLHLS'
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media['path'], channel_id, 'mp4', fatal=False, live=True,
|
||||
m3u8_id='hls-ll' if is_low_latency else 'hls')
|
||||
for f in fmts:
|
||||
if is_low_latency:
|
||||
f['source_preference'] = -2
|
||||
if '-afragalow.stream-audio.stream' in f['format_id']:
|
||||
f['quality'] = -2
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(live_detail, {
|
||||
'title': ('liveTitle', {str}),
|
||||
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
|
||||
'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
|
||||
'view_count': ('accumulateCount', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelId', {str}),
|
||||
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CHZZKVideoIE(InfoExtractor):
|
||||
IE_NAME = 'chzzk:video'
|
||||
_VALID_URL = r'https?://chzzk\.naver\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://chzzk.naver.com/video/1754',
|
||||
'md5': 'b0c0c1bb888d913b93d702b1512c7f06',
|
||||
'info_dict': {
|
||||
'id': '1754',
|
||||
'ext': 'mp4',
|
||||
'title': '치지직 테스트 방송',
|
||||
'channel': '침착맨',
|
||||
'channel_id': 'bb382c2c0cc9fa7c86ab3b037fb5799c',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 15577,
|
||||
'timestamp': 1702970505.417,
|
||||
'upload_date': '20231219',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_meta = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
|
||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
||||
query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
}, note='Downloading video playback', errnote='Unable to download video playback')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_meta, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'view_count': ('readCount', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelId', {str}),
|
||||
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
|
||||
}),
|
||||
}
|
@ -67,7 +67,10 @@ class CineverseIE(CineverseBaseIE):
|
||||
html = self._download_webpage(url, video_id)
|
||||
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
|
||||
|
||||
if idetails.get('err_code') == 1200:
|
||||
err_code = idetails.get('err_code')
|
||||
if err_code == 1002:
|
||||
self.raise_login_required()
|
||||
elif err_code == 1200:
|
||||
self.raise_geo_restricted(
|
||||
'This video is not available from your location due to geo restriction. '
|
||||
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
|
||||
|
@ -46,15 +46,18 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
video_id.split('.')[1] + '==='), video_id)['sub']
|
||||
manifest_base_url = base_url + 'manifest/video.'
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
manifest_base_url + 'm3u8', video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
79
yt_dlp/extractor/cloudycdn.py
Normal file
79
yt_dlp/extractor/cloudycdn.py
Normal file
@ -0,0 +1,79 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CloudyCDNIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1700589151,
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': 'D23-6000-105_cetstud',
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1',
|
||||
'md5': '798828a479151e2444d8dcfbec76e482',
|
||||
'info_dict': {
|
||||
'id': '26e_lv-8-5-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'LV-8-5-1',
|
||||
'timestamp': 1669767167,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
|
||||
'duration': 1205,
|
||||
'upload_date': '20221130',
|
||||
}
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
|
||||
'md5': '63074e8e6c84ac2a01f2fb8bf03b8f43',
|
||||
'info_dict': {
|
||||
'id': 'cqd_lib-2',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230223',
|
||||
'duration': 629,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
|
||||
'timestamp': 1677181513,
|
||||
'title': 'LIB-2',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site_id, video_id = self._match_valid_url(url).group('site_id', 'id')
|
||||
|
||||
data = self._download_json(
|
||||
f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/',
|
||||
video_id, data=urlencode_postdata({
|
||||
'version': '6.4.0',
|
||||
'referer': url,
|
||||
}))
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(data, {
|
||||
'title': ('name', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('upload_date', {parse_iso8601}),
|
||||
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||
}),
|
||||
}
|
@ -247,6 +247,8 @@ class InfoExtractor:
|
||||
(For internal use only)
|
||||
* http_chunk_size Chunk size for HTTP downloads
|
||||
* ffmpeg_args Extra arguments for ffmpeg downloader
|
||||
* is_dash_periods Whether the format is a result of merging
|
||||
multiple DASH periods.
|
||||
RTMP formats can also have the additional fields: page_url,
|
||||
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
|
||||
rtmp_protocol, rtmp_real_time
|
||||
@ -278,7 +280,7 @@ class InfoExtractor:
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
license: License name the video is licensed under.
|
||||
creator: The creator of the video.
|
||||
creators: List of creators of the video.
|
||||
timestamp: UNIX timestamp of the moment the video was uploaded
|
||||
upload_date: Video upload date in UTC (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp
|
||||
@ -432,14 +434,6 @@ class InfoExtractor:
|
||||
Useful for splits and compilations.
|
||||
disc_number: Number of the disc or other physical medium the track belongs to,
|
||||
as an integer.
|
||||
composer: Deprecated; use "composers" instead.
|
||||
Composer(s) of the piece, comma-separated.
|
||||
artist: Deprecated; use "artists" instead.
|
||||
Artist(s) of the track, comma-separated.
|
||||
genre: Deprecated; use "genres" instead.
|
||||
Genre(s) of the track, comma-separated.
|
||||
album_artist: Deprecated; use "album_artists" instead.
|
||||
All artists appeared on the album, comma-separated.
|
||||
|
||||
The following fields should only be set for clips that should be cut from the original video:
|
||||
|
||||
@ -450,6 +444,18 @@ class InfoExtractor:
|
||||
rows: Number of rows in each storyboard fragment, as an integer
|
||||
columns: Number of columns in each storyboard fragment, as an integer
|
||||
|
||||
The following fields are deprecated and should not be set by new code:
|
||||
composer: Use "composers" instead.
|
||||
Composer(s) of the piece, comma-separated.
|
||||
artist: Use "artists" instead.
|
||||
Artist(s) of the track, comma-separated.
|
||||
genre: Use "genres" instead.
|
||||
Genre(s) of the track, comma-separated.
|
||||
album_artist: Use "album_artists" instead.
|
||||
All artists appeared on the album, comma-separated.
|
||||
creator: Use "creators" instead.
|
||||
The creator of the video.
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
Unless mentioned otherwise, None is equivalent to absence of information.
|
||||
@ -2538,7 +2544,11 @@ class InfoExtractor:
|
||||
self._report_ignoring_subs('DASH')
|
||||
return fmts
|
||||
|
||||
def _extract_mpd_formats_and_subtitles(
|
||||
def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
|
||||
periods = self._extract_mpd_periods(*args, **kwargs)
|
||||
return self._merge_mpd_periods(periods)
|
||||
|
||||
def _extract_mpd_periods(
|
||||
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
|
||||
fatal=True, data=None, headers={}, query={}):
|
||||
|
||||
@ -2551,17 +2561,16 @@ class InfoExtractor:
|
||||
errnote='Failed to download MPD manifest' if errnote is None else errnote,
|
||||
fatal=fatal, data=data, headers=headers, query=query)
|
||||
if res is False:
|
||||
return [], {}
|
||||
return []
|
||||
mpd_doc, urlh = res
|
||||
if mpd_doc is None:
|
||||
return [], {}
|
||||
return []
|
||||
|
||||
# We could have been redirected to a new url when we retrieved our mpd file.
|
||||
mpd_url = urlh.url
|
||||
mpd_base_url = base_url(mpd_url)
|
||||
|
||||
return self._parse_mpd_formats_and_subtitles(
|
||||
mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||
return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||
|
||||
def _parse_mpd_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
|
||||
@ -2569,8 +2578,39 @@ class InfoExtractor:
|
||||
self._report_ignoring_subs('DASH')
|
||||
return fmts
|
||||
|
||||
def _parse_mpd_formats_and_subtitles(
|
||||
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||
def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
|
||||
periods = self._parse_mpd_periods(*args, **kwargs)
|
||||
return self._merge_mpd_periods(periods)
|
||||
|
||||
def _merge_mpd_periods(self, periods):
|
||||
"""
|
||||
Combine all formats and subtitles from an MPD manifest into a single list,
|
||||
by concatenate streams with similar formats.
|
||||
"""
|
||||
formats, subtitles = {}, {}
|
||||
for period in periods:
|
||||
for f in period['formats']:
|
||||
assert 'is_dash_periods' not in f, 'format already processed'
|
||||
f['is_dash_periods'] = True
|
||||
format_key = tuple(v for k, v in f.items() if k not in (
|
||||
('format_id', 'fragments', 'manifest_stream_number')))
|
||||
if format_key not in formats:
|
||||
formats[format_key] = f
|
||||
elif 'fragments' in f:
|
||||
formats[format_key].setdefault('fragments', []).extend(f['fragments'])
|
||||
|
||||
if subtitles and period['subtitles']:
|
||||
self.report_warning(bug_reports_message(
|
||||
'Found subtitles in multiple periods in the DASH manifest; '
|
||||
'if part of the subtitles are missing,'
|
||||
), only_once=True)
|
||||
|
||||
for sub_lang, sub_info in period['subtitles'].items():
|
||||
subtitles.setdefault(sub_lang, []).extend(sub_info)
|
||||
|
||||
return list(formats.values()), subtitles
|
||||
|
||||
def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
@ -2649,9 +2689,13 @@ class InfoExtractor:
|
||||
return ms_info
|
||||
|
||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||
formats, subtitles = [], {}
|
||||
stream_numbers = collections.defaultdict(int)
|
||||
for period in mpd_doc.findall(_add_ns('Period')):
|
||||
for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
|
||||
period_entry = {
|
||||
'id': period.get('id', f'period-{period_idx}'),
|
||||
'formats': [],
|
||||
'subtitles': collections.defaultdict(list),
|
||||
}
|
||||
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
||||
period_ms_info = extract_multisegment_info(period, {
|
||||
'start_number': 1,
|
||||
@ -2901,11 +2945,10 @@ class InfoExtractor:
|
||||
if content_type in ('video', 'audio', 'image/jpeg'):
|
||||
f['manifest_stream_number'] = stream_numbers[f['url']]
|
||||
stream_numbers[f['url']] += 1
|
||||
formats.append(f)
|
||||
period_entry['formats'].append(f)
|
||||
elif content_type == 'text':
|
||||
subtitles.setdefault(lang or 'und', []).append(f)
|
||||
|
||||
return formats, subtitles
|
||||
period_entry['subtitles'][lang or 'und'].append(f)
|
||||
yield period_entry
|
||||
|
||||
def _extract_ism_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
|
||||
|
@ -33,10 +33,7 @@ class CrooksAndLiarsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
|
||||
|
||||
manifest = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
|
||||
video_id)
|
||||
manifest = self._search_json(r'var\s+manifest\s*=', webpage, 'manifest JSON', video_id)
|
||||
|
||||
quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))
|
||||
|
||||
|
72
yt_dlp/extractor/elementorembed.py
Normal file
72
yt_dlp/extractor/elementorembed.py
Normal file
@ -0,0 +1,72 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import unescapeHTML, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ElementorEmbedIE(InfoExtractor):
|
||||
_VALID_URL = False
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://capitaltv.cy/2023/12/14/υγεια-και-ζωη-14-12-2023-δρ-ξενια-κωσταντινιδο/',
|
||||
'info_dict': {
|
||||
'id': 'KgzuxwuQwM4',
|
||||
'ext': 'mp4',
|
||||
'title': 'ΥΓΕΙΑ ΚΑΙ ΖΩΗ 14 12 2023 ΔΡ ΞΕΝΙΑ ΚΩΣΤΑΝΤΙΝΙΔΟΥ',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/KgzuxwuQwM4/maxresdefault.jpg',
|
||||
'playable_in_embed': True,
|
||||
'tags': 'count:16',
|
||||
'like_count': int,
|
||||
'channel': 'Capital TV Cyprus',
|
||||
'channel_id': 'UCR8LwVKTLGEXt4ZAErpCMrg',
|
||||
'availability': 'public',
|
||||
'description': 'md5:7a3308a22881aea4612358c4ba121f77',
|
||||
'duration': 2891,
|
||||
'upload_date': '20231214',
|
||||
'uploader_id': '@capitaltvcyprus6389',
|
||||
'live_status': 'not_live',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCR8LwVKTLGEXt4ZAErpCMrg',
|
||||
'uploader_url': 'https://www.youtube.com/@capitaltvcyprus6389',
|
||||
'uploader': 'Capital TV Cyprus',
|
||||
'age_limit': 0,
|
||||
'categories': ['News & Politics'],
|
||||
'view_count': int,
|
||||
'channel_follower_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://elementor.com/academy/theme-builder-collection/?playlist=76011151&video=9e59909',
|
||||
'info_dict': {
|
||||
'id': '?playlist=76011151&video=9e59909',
|
||||
'title': 'Theme Builder Collection - Academy',
|
||||
'age_limit': 0,
|
||||
'timestamp': 1702196984.0,
|
||||
'upload_date': '20231210',
|
||||
'description': 'md5:7f52c52715ee9e54fd7f82210511673d',
|
||||
'thumbnail': 'https://elementor.com/academy/wp-content/uploads/2021/07/Theme-Builder-1.png',
|
||||
},
|
||||
'playlist_count': 11,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_WIDGET_REGEX = r'<div[^>]+class="[^"]*elementor-widget-video(?:-playlist)?[^"]*"[^>]*data-settings="([^"]*)"'
|
||||
|
||||
def _extract_from_webpage(self, url, webpage):
|
||||
for data_settings in re.findall(self._WIDGET_REGEX, webpage):
|
||||
data = self._parse_json(data_settings, None, fatal=False, transform_source=unescapeHTML)
|
||||
if youtube_url := traverse_obj(data, ('youtube_url', {url_or_none})):
|
||||
yield self.url_result(youtube_url, ie=YoutubeIE)
|
||||
|
||||
for video in traverse_obj(data, ('tabs', lambda _, v: v['_id'], {dict})):
|
||||
if youtube_url := traverse_obj(video, ('youtube_url', {url_or_none})):
|
||||
yield self.url_result(youtube_url, ie=YoutubeIE)
|
||||
if vimeo_url := traverse_obj(video, ('vimeo_url', {url_or_none})):
|
||||
yield self.url_result(vimeo_url, ie=VimeoIE)
|
||||
for direct_url in traverse_obj(video, (('hosted_url', 'external_url'), 'url', {url_or_none})):
|
||||
yield {
|
||||
'id': video['_id'],
|
||||
'url': direct_url,
|
||||
'title': video.get('title'),
|
||||
}
|
@ -1,8 +1,10 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
encode_base_n,
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
@ -81,6 +83,7 @@ class EpornerIE(InfoExtractor):
|
||||
sources = video['sources']
|
||||
|
||||
formats = []
|
||||
has_av1 = bool(get_elements_by_class('download-av1', webpage))
|
||||
for kind, formats_dict in sources.items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
@ -106,6 +109,14 @@ class EpornerIE(InfoExtractor):
|
||||
'height': height,
|
||||
'fps': fps,
|
||||
})
|
||||
if has_av1:
|
||||
formats.append({
|
||||
'url': src.replace('.mp4', '-av1.mp4'),
|
||||
'format_id': join_nonempty('av1', format_id),
|
||||
'height': height,
|
||||
'fps': fps,
|
||||
'vcodec': 'av1',
|
||||
})
|
||||
|
||||
json_ld = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
|
224
yt_dlp/extractor/err.py
Normal file
224
yt_dlp/extractor/err.py
Normal file
@ -0,0 +1,224 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ERRJupiterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:jupiter(?:pluss)?|lasteekraan)\.err\.ee/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'note': 'Jupiter: Movie: siin-me-oleme',
|
||||
'url': 'https://jupiter.err.ee/1211107/siin-me-oleme',
|
||||
'md5': '9b45d1682a98853acaa1e1b0c791f425',
|
||||
'info_dict': {
|
||||
'id': '1211107',
|
||||
'ext': 'mp4',
|
||||
'title': 'Siin me oleme!',
|
||||
'alt_title': '',
|
||||
'description': 'md5:1825b795f5f7584241aeb59e5bbb4f70',
|
||||
'release_date': '20231226',
|
||||
'upload_date': '20201217',
|
||||
'modified_date': '20201217',
|
||||
'release_timestamp': 1703577600,
|
||||
'timestamp': 1608210000,
|
||||
'modified_timestamp': 1608220800,
|
||||
'release_year': 1978,
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter: Series: Impulss',
|
||||
'url': 'https://jupiter.err.ee/1609145945/impulss',
|
||||
'md5': 'a378486df07ed1ba74e46cc861886243',
|
||||
'info_dict': {
|
||||
'id': '1609145945',
|
||||
'ext': 'mp4',
|
||||
'title': 'Impulss',
|
||||
'alt_title': 'Loteriipilet hooldekodusse',
|
||||
'description': 'md5:fa8a2ed0cdccb130211513443ee4d571',
|
||||
'release_date': '20231107',
|
||||
'upload_date': '20231026',
|
||||
'modified_date': '20231118',
|
||||
'release_timestamp': 1699380000,
|
||||
'timestamp': 1698327601,
|
||||
'modified_timestamp': 1700311802,
|
||||
'series': 'Impulss',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Loteriipilet hooldekodusse',
|
||||
'episode_number': 6,
|
||||
'series_id': '1609108187',
|
||||
'release_year': 2023,
|
||||
'episode_id': '1609145945',
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter: Radio Show: mnemoturniir episode',
|
||||
'url': 'https://jupiter.err.ee/1037919/mnemoturniir',
|
||||
'md5': 'f1eb95fe66f9620ff84e81bbac37076a',
|
||||
'info_dict': {
|
||||
'id': '1037919',
|
||||
'ext': 'm4a',
|
||||
'title': 'Mnemoturniir',
|
||||
'alt_title': '',
|
||||
'description': 'md5:626db52394e7583c26ab74d6a34d9982',
|
||||
'release_date': '20240121',
|
||||
'upload_date': '20240108',
|
||||
'modified_date': '20240121',
|
||||
'release_timestamp': 1705827900,
|
||||
'timestamp': 1704675602,
|
||||
'modified_timestamp': 1705827601,
|
||||
'series': 'Mnemoturniir',
|
||||
'season': 'Season 0',
|
||||
'season_number': 0,
|
||||
'episode': 'Episode 0',
|
||||
'episode_number': 0,
|
||||
'series_id': '1037919',
|
||||
'release_year': 2024,
|
||||
'episode_id': '1609215101',
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter+: Clip: bolee-zelenyj-tallinn',
|
||||
'url': 'https://jupiterpluss.err.ee/1609180445/bolee-zelenyj-tallinn',
|
||||
'md5': '1b812270c4daf6ce51c06bfeaf33ed95',
|
||||
'info_dict': {
|
||||
'id': '1609180445',
|
||||
'ext': 'mp4',
|
||||
'title': 'Более зеленый Таллинн',
|
||||
'alt_title': '',
|
||||
'description': 'md5:fd34d9bf939c28c4a725b19a7f0d6320',
|
||||
'release_date': '20231224',
|
||||
'upload_date': '20231130',
|
||||
'modified_date': '20231207',
|
||||
'release_timestamp': 1703423400,
|
||||
'timestamp': 1701338400,
|
||||
'modified_timestamp': 1701967200,
|
||||
'release_year': 2023,
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter+: Series: The Sniffer',
|
||||
'url': 'https://jupiterpluss.err.ee/1608311387/njuhach',
|
||||
'md5': '2abdeb7131ce551bce49e8d0cea08536',
|
||||
'info_dict': {
|
||||
'id': '1608311387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Нюхач',
|
||||
'alt_title': '',
|
||||
'description': 'md5:8c5c7d8f32ec6e54cd498c9e59ca83bc',
|
||||
'release_date': '20230601',
|
||||
'upload_date': '20210818',
|
||||
'modified_date': '20210903',
|
||||
'release_timestamp': 1685633400,
|
||||
'timestamp': 1629318000,
|
||||
'modified_timestamp': 1630686000,
|
||||
'release_year': 2013,
|
||||
'episode': 'Episode 1',
|
||||
'episode_id': '1608311390',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Нюхач',
|
||||
'series_id': '1608311387',
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter+: Podcast: lesnye-istorii-aisty',
|
||||
'url': 'https://jupiterpluss.err.ee/1608990335/lesnye-istorii-aisty',
|
||||
'md5': '8b46d7e4510b254a14b7a52211b5bf96',
|
||||
'info_dict': {
|
||||
'id': '1608990335',
|
||||
'ext': 'm4a',
|
||||
'title': 'Лесные истории | Аисты',
|
||||
'alt_title': '',
|
||||
'description': 'md5:065e721623e271e7a63e6540d409ca6b',
|
||||
'release_date': '20230609',
|
||||
'upload_date': '20230527',
|
||||
'modified_date': '20230608',
|
||||
'release_timestamp': 1686308700,
|
||||
'timestamp': 1685145600,
|
||||
'modified_timestamp': 1686252600,
|
||||
'release_year': 2023,
|
||||
'episode': 'Episode 0',
|
||||
'episode_id': '1608990335',
|
||||
'episode_number': 0,
|
||||
'season': 'Season 0',
|
||||
'season_number': 0,
|
||||
'series': 'Лесные истории | Аисты',
|
||||
'series_id': '1037497',
|
||||
}
|
||||
}, {
|
||||
'note': 'Lasteekraan: Pätu',
|
||||
'url': 'https://lasteekraan.err.ee/1092243/patu',
|
||||
'md5': 'a67eb9b9bcb3d201718c15d1638edf77',
|
||||
'info_dict': {
|
||||
'id': '1092243',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pätu',
|
||||
'alt_title': '',
|
||||
'description': 'md5:64a7b5a80afd7042d3f8ec48c77befd9',
|
||||
'release_date': '20230614',
|
||||
'upload_date': '20200520',
|
||||
'modified_date': '20200520',
|
||||
'release_timestamp': 1686745800,
|
||||
'timestamp': 1589975640,
|
||||
'modified_timestamp': 1589975640,
|
||||
'release_year': 1990,
|
||||
'episode': 'Episode 1',
|
||||
'episode_id': '1092243',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Pätu',
|
||||
'series_id': '1092236',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://services.err.ee/api/v2/vodContent/getContentPageData', video_id,
|
||||
query={'contentId': video_id})['data']['mainContent']
|
||||
|
||||
media_data = traverse_obj(data, ('medias', ..., {dict}), get_all=False)
|
||||
if traverse_obj(media_data, ('restrictions', 'drm', {bool})):
|
||||
self.report_drm(video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for format_url in set(traverse_obj(media_data, ('src', ('hls', 'hls2', 'hlsNew'), {url_or_none}))):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
for format_url in set(traverse_obj(media_data, ('src', ('dash', 'dashNew'), {url_or_none}))):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
format_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if format_url := traverse_obj(media_data, ('src', 'file', {url_or_none})):
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(data, {
|
||||
'title': ('heading', {str}),
|
||||
'alt_title': ('subHeading', {str}),
|
||||
'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}),
|
||||
'timestamp': ('created', {int_or_none}),
|
||||
'modified_timestamp': ('updated', {int_or_none}),
|
||||
'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}, get_all=False),
|
||||
**(traverse_obj(data, {
|
||||
'series': ('heading', {str}),
|
||||
'series_id': ('rootContentId', {str_or_none}),
|
||||
'episode': ('subHeading', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'episode_id': ('id', {str_or_none}),
|
||||
}) if data.get('type') == 'episode' else {}),
|
||||
}
|
@ -20,6 +20,7 @@ from ..utils import (
|
||||
get_element_by_id,
|
||||
get_first,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
@ -43,6 +44,7 @@ class FacebookIE(InfoExtractor):
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:
|
||||
permalink\.php|
|
||||
video/video\.php|
|
||||
photo\.php|
|
||||
video\.php|
|
||||
@ -52,12 +54,13 @@ class FacebookIE(InfoExtractor):
|
||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
events/(?:[^/]+/)?|
|
||||
groups/[^/]+/(?:permalink|posts)/|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
(?P<id>pfbid[A-Za-z0-9]+|\d+)
|
||||
'''
|
||||
_EMBED_REGEX = [
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
|
||||
@ -247,6 +250,41 @@ class FacebookIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 148.435,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl',
|
||||
'info_dict': {
|
||||
'id': '6968553779868435',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:2f2fcf93e97ac00244fe64521bbdb0cb',
|
||||
'uploader': 'ATTN:',
|
||||
'upload_date': '20231207',
|
||||
'title': 'ATTN:',
|
||||
'duration': 132.675,
|
||||
'uploader_id': '100064451419378',
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'timestamp': 1701975646,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
'url': 'https://www.facebook.com/permalink.php?story_fbid=pfbid0fqQuVEQyXRa9Dp4RcaTR14KHU3uULHV1EK7eckNXSH63JMuoALsAvVCJ97zAGitil&id=100068861234290',
|
||||
'info_dict': {
|
||||
'id': '270103405756416',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lela Evans',
|
||||
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Lela Evans',
|
||||
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
|
||||
'upload_date': '20231228',
|
||||
'timestamp': 1703804085,
|
||||
'duration': 394.347,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/story.php?story_fbid=pfbid0Fnzhm8UuzjBYpPMNFzaSpFE9UmLdU4fJN8qTANi1Dmtj5q7DNrL5NERXfsAzDEV7l&id=100073071055552',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@ -362,6 +400,18 @@ class FacebookIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.event.cover_media_renderer.cover_video
|
||||
'url': 'https://m.facebook.com/events/1509582499515440',
|
||||
'info_dict': {
|
||||
'id': '637246984455045',
|
||||
'ext': 'mp4',
|
||||
'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"',
|
||||
'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Comitato Liberi Pensatori',
|
||||
'uploader_id': '100065709540881',
|
||||
},
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
@ -436,38 +486,10 @@ class FacebookIE(InfoExtractor):
|
||||
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
|
||||
post = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
|
||||
automatic_captions, subtitles = {}, {}
|
||||
subs_data = traverse_obj(post, (..., 'video', ..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')))
|
||||
is_video_broadcast = get_first(subs_data, 'is_video_broadcast', expected_type=bool)
|
||||
captions = get_first(subs_data, 'video_available_captions_locales', 'captions_url')
|
||||
if url_or_none(captions): # if subs_data only had a 'captions_url'
|
||||
locale = self._html_search_meta(['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
|
||||
subtitles[locale] = [{'url': captions}]
|
||||
# or else subs_data had 'video_available_captions_locales', a list of dicts
|
||||
for caption in traverse_obj(captions, (
|
||||
{lambda x: sorted(x, key=lambda c: c['locale'])}, lambda _, v: v['captions_url'])
|
||||
):
|
||||
lang = caption.get('localized_language') or ''
|
||||
subs = {
|
||||
'url': caption['captions_url'],
|
||||
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
|
||||
}
|
||||
if caption.get('localized_creation_method') or is_video_broadcast:
|
||||
automatic_captions.setdefault(caption['locale'], []).append(subs)
|
||||
else:
|
||||
subtitles.setdefault(caption['locale'], []).append(subs)
|
||||
|
||||
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
||||
title = get_first(media, ('title', 'text'))
|
||||
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
|
||||
uploader_data = (
|
||||
get_first(media, ('owner', {dict}))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict})) or {})
|
||||
|
||||
page_title = title or self._html_search_regex((
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>',
|
||||
@ -476,11 +498,16 @@ class FacebookIE(InfoExtractor):
|
||||
description = description or self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'],
|
||||
webpage, 'description', default=None)
|
||||
uploader_data = (
|
||||
get_first(media, ('owner', {dict}))
|
||||
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict}))
|
||||
or get_first(post, ('event', 'event_creator', {dict})) or {})
|
||||
uploader = uploader_data.get('name') or (
|
||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
or self._search_regex(
|
||||
(r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False))
|
||||
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
@ -502,8 +529,6 @@ class FacebookIE(InfoExtractor):
|
||||
webpage, 'view count', default=None)),
|
||||
'concurrent_view_count': get_first(post, (
|
||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||
'automatic_captions': automatic_captions,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
@ -545,7 +570,11 @@ class FacebookIE(InfoExtractor):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
for f in info['formats']:
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
|
||||
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
@ -555,8 +584,8 @@ class FacebookIE(InfoExtractor):
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
return traverse_obj(extract_relay_data(_filter), (
|
||||
'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||
lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ...,
|
||||
'__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
|
||||
..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
@ -597,6 +626,29 @@ class FacebookIE(InfoExtractor):
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
|
||||
automatic_captions, subtitles = {}, {}
|
||||
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
|
||||
for caption in traverse_obj(video, (
|
||||
'video_available_captions_locales',
|
||||
{lambda x: sorted(x, key=lambda c: c['locale'])},
|
||||
lambda _, v: url_or_none(v['captions_url'])
|
||||
)):
|
||||
lang = caption.get('localized_language') or 'und'
|
||||
subs = {
|
||||
'url': caption['captions_url'],
|
||||
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
|
||||
}
|
||||
if caption.get('localized_creation_method') or is_broadcast:
|
||||
automatic_captions.setdefault(caption['locale'], []).append(subs)
|
||||
else:
|
||||
subtitles.setdefault(caption['locale'], []).append(subs)
|
||||
captions_url = traverse_obj(video, ('captions_url', {url_or_none}))
|
||||
if captions_url and not automatic_captions and not subtitles:
|
||||
locale = self._html_search_meta(
|
||||
['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
|
||||
(automatic_captions if is_broadcast else subtitles)[locale] = [{'url': captions_url}]
|
||||
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
@ -606,6 +658,8 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
|
||||
'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
|
||||
or float_or_none(video.get('length_in_second'))),
|
||||
'automatic_captions': automatic_captions,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
process_formats(info)
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
@ -640,7 +694,8 @@ class FacebookIE(InfoExtractor):
|
||||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
|
||||
video = data.get('video') or {}
|
||||
video = traverse_obj(data, (
|
||||
'event', 'cover_media_renderer', 'cover_video'), 'video', expected_type=dict) or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
@ -659,6 +714,9 @@ class FacebookIE(InfoExtractor):
|
||||
# honor precise duration in video info
|
||||
if video_info.get('duration'):
|
||||
webpage_info['duration'] = video_info['duration']
|
||||
# preserve preferred_thumbnail in video info
|
||||
if video_info.get('thumbnail'):
|
||||
webpage_info['thumbnail'] = video_info['thumbnail']
|
||||
return merge_dicts(webpage_info, video_info)
|
||||
|
||||
if not video_data:
|
||||
@ -889,3 +947,114 @@ class FacebookReelIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
f'https://m.facebook.com/watch/?v={video_id}&_rdr', FacebookIE, video_id)
|
||||
|
||||
|
||||
class FacebookAdsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/ads/library/?\?(?:[^#]+&)?id=(?P<id>\d+)'
|
||||
IE_NAME = 'facebook:ads'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/ads/library/?id=899206155126718',
|
||||
'info_dict': {
|
||||
'id': '899206155126718',
|
||||
'ext': 'mp4',
|
||||
'title': 'video by Kandao',
|
||||
'uploader': 'Kandao',
|
||||
'uploader_id': '774114102743284',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
'timestamp': 1702548330,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'upload_date': '20231214',
|
||||
'like_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
|
||||
'info_dict': {
|
||||
'id': '893637265423481',
|
||||
'title': 'Jusqu\u2019\u00e0 -25% sur une s\u00e9lection de vins p\u00e9tillants italiens ',
|
||||
'uploader': 'Eataly Paris Marais',
|
||||
'uploader_id': '2086668958314152',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
'timestamp': 1703571529,
|
||||
'upload_date': '20231226',
|
||||
'like_count': int,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.facebook.com/ads/library/?id=901230958115569',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS_MAP = {
|
||||
'watermarked_video_sd_url': ('sd-wmk', 'SD, watermarked'),
|
||||
'video_sd_url': ('sd', None),
|
||||
'watermarked_video_hd_url': ('hd-wmk', 'HD, watermarked'),
|
||||
'video_hd_url': ('hd', None),
|
||||
}
|
||||
|
||||
def _extract_formats(self, video_dict):
|
||||
formats = []
|
||||
for format_key, format_url in traverse_obj(video_dict, (
|
||||
{dict.items}, lambda _, v: v[0] in self._FORMATS_MAP and url_or_none(v[1])
|
||||
)):
|
||||
formats.append({
|
||||
'format_id': self._FORMATS_MAP[format_key][0],
|
||||
'format_note': self._FORMATS_MAP[format_key][1],
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'quality': qualities(tuple(self._FORMATS_MAP))(format_key),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
post_data = [self._parse_json(j, video_id, fatal=False)
|
||||
for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
|
||||
data = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
|
||||
if not data:
|
||||
raise ExtractorError('Unable to extract ad data')
|
||||
|
||||
title = data.get('title')
|
||||
if not title or title == '{{product.name}}':
|
||||
title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
|
||||
|
||||
info_dict = traverse_obj(data, {
|
||||
'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
|
||||
'uploader': ('page_name', {str}),
|
||||
'uploader_id': ('page_id', {str_or_none}),
|
||||
'uploader_url': ('page_profile_uri', {url_or_none}),
|
||||
'timestamp': ('creation_time', {int_or_none}),
|
||||
'like_count': ('page_like_count', {int_or_none}),
|
||||
})
|
||||
|
||||
entries = []
|
||||
for idx, entry in enumerate(traverse_obj(
|
||||
data, (('videos', 'cards'), lambda _, v: any([url_or_none(v[f]) for f in self._FORMATS_MAP]))), 1
|
||||
):
|
||||
entries.append({
|
||||
'id': f'{video_id}_{idx}',
|
||||
'title': entry.get('title') or title,
|
||||
'description': entry.get('link_description') or info_dict.get('description'),
|
||||
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
|
||||
'formats': self._extract_formats(entry),
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
info_dict.update(entries[0])
|
||||
|
||||
elif len(entries) > 1:
|
||||
info_dict.update({
|
||||
'title': entries[0]['title'],
|
||||
'entries': entries,
|
||||
'_type': 'playlist',
|
||||
})
|
||||
|
||||
info_dict['id'] = video_id
|
||||
|
||||
return info_dict
|
||||
|
62
yt_dlp/extractor/flextv.py
Normal file
62
yt_dlp/extractor/flextv.py
Normal file
@ -0,0 +1,62 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FlexTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?flextv\.co\.kr/channels/(?P<id>\d+)/live'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.flextv.co.kr/channels/231638/live',
|
||||
'info_dict': {
|
||||
'id': '231638',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^214하나만\.\.\. ',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'upload_date': r're:\d{8}',
|
||||
'timestamp': int,
|
||||
'live_status': 'is_live',
|
||||
'channel': 'Hi별',
|
||||
'channel_id': '244396',
|
||||
},
|
||||
'skip': 'The channel is offline',
|
||||
}, {
|
||||
'url': 'https://www.flextv.co.kr/channels/746/live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
stream_data = self._download_json(
|
||||
f'https://api.flextv.co.kr/api/channels/{channel_id}/stream',
|
||||
channel_id, query={'option': 'all'})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise UserNotLive(video_id=channel_id)
|
||||
raise
|
||||
|
||||
playlist_url = stream_data['sources'][0]['url']
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playlist_url, channel_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
**traverse_obj(stream_data, {
|
||||
'title': ('stream', 'title', {str}),
|
||||
'timestamp': ('stream', 'createdAt', {parse_iso8601}),
|
||||
'thumbnail': ('thumbUrl', {url_or_none}),
|
||||
'channel': ('owner', 'name', {str}),
|
||||
'channel_id': ('owner', 'id', {str_or_none}),
|
||||
}),
|
||||
}
|
@ -11,6 +11,7 @@ from ..utils import (
|
||||
join_nonempty,
|
||||
parse_codecs,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
@ -108,6 +109,64 @@ class FloatplaneIE(InfoExtractor):
|
||||
'availability': 'subscriber_only',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.floatplane.com/post/65B5PNoBtf',
|
||||
'info_dict': {
|
||||
'id': '65B5PNoBtf',
|
||||
'description': 'I recorded the inbuilt demo mode for your 90\'s enjoyment, thanks for being Floaties!',
|
||||
'display_id': '65B5PNoBtf',
|
||||
'like_count': int,
|
||||
'release_timestamp': 1701249480,
|
||||
'uploader': 'The Trash Network',
|
||||
'availability': 'subscriber_only',
|
||||
'uploader_id': '61bc20c9a131fb692bf2a513',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
|
||||
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
|
||||
'comment_count': int,
|
||||
'title': 'The $50 electronic drum kit.',
|
||||
'channel_id': '64424fe73cd58cbcf8d8e131',
|
||||
'thumbnail': 'https://pbs.floatplane.com/blogPost_thumbnails/65B5PNoBtf/725555379422705_1701247052743.jpeg',
|
||||
'dislike_count': int,
|
||||
'channel': 'The Drum Thing',
|
||||
'release_date': '20231129',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'ISPJjexylS',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20231129',
|
||||
'release_timestamp': 1701249480,
|
||||
'title': 'The $50 electronic drum kit. .mov',
|
||||
'channel_id': '64424fe73cd58cbcf8d8e131',
|
||||
'thumbnail': 'https://pbs.floatplane.com/video_thumbnails/ISPJjexylS/335202812134041_1701249383392.jpeg',
|
||||
'availability': 'subscriber_only',
|
||||
'uploader': 'The Trash Network',
|
||||
'duration': 622,
|
||||
'channel': 'The Drum Thing',
|
||||
'uploader_id': '61bc20c9a131fb692bf2a513',
|
||||
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'qKfxu6fEpu',
|
||||
'ext': 'aac',
|
||||
'release_date': '20231129',
|
||||
'release_timestamp': 1701249480,
|
||||
'title': 'Roland TD-7 Demo.m4a',
|
||||
'channel_id': '64424fe73cd58cbcf8d8e131',
|
||||
'availability': 'subscriber_only',
|
||||
'uploader': 'The Trash Network',
|
||||
'duration': 114,
|
||||
'channel': 'The Drum Thing',
|
||||
'uploader_id': '61bc20c9a131fb692bf2a513',
|
||||
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
|
||||
},
|
||||
}],
|
||||
'skip': 'requires subscription: "The Trash Network"',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
@ -124,6 +183,22 @@ class FloatplaneIE(InfoExtractor):
|
||||
if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
|
||||
raise ExtractorError('Post does not contain a video or audio track', expected=True)
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
|
||||
|
||||
common_info = {
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
**traverse_obj(post_data, {
|
||||
'uploader': ('creator', 'title', {str}),
|
||||
'uploader_id': ('creator', 'id', {str}),
|
||||
'channel': ('channel', 'title', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
items = []
|
||||
for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
|
||||
media_id = media['id']
|
||||
@ -150,11 +225,11 @@ class FloatplaneIE(InfoExtractor):
|
||||
formats = []
|
||||
for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
|
||||
url = urljoin(stream['cdn'], format_path(traverse_obj(
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name']))))
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
|
||||
formats.append({
|
||||
**traverse_obj(quality, {
|
||||
'format_id': 'name',
|
||||
'format_note': 'label',
|
||||
'format_id': ('name', {str}),
|
||||
'format_note': ('label', {str}),
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
}),
|
||||
@ -164,38 +239,28 @@ class FloatplaneIE(InfoExtractor):
|
||||
})
|
||||
|
||||
items.append({
|
||||
**common_info,
|
||||
'id': media_id,
|
||||
**traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path'),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
|
||||
channel_url = urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname')))
|
||||
|
||||
post_info = {
|
||||
**common_info,
|
||||
'id': post_id,
|
||||
'display_id': post_id,
|
||||
**traverse_obj(post_data, {
|
||||
'title': 'title',
|
||||
'title': ('title', {str}),
|
||||
'description': ('text', {clean_html}),
|
||||
'uploader': ('creator', 'title'),
|
||||
'uploader_id': ('creator', 'id'),
|
||||
'channel': ('channel', 'title'),
|
||||
'channel_id': ('channel', 'id'),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'dislike_count': ('dislikes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'thumbnail': ('thumbnail', 'path'),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': channel_url,
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
}
|
||||
|
||||
if len(items) > 1:
|
||||
|
@ -1,25 +1,29 @@
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
|
||||
'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81',
|
||||
'md5': '8610449476156f338761a75391b0017d',
|
||||
'info_dict': {
|
||||
'id': '1155821',
|
||||
'ext': 'mp4',
|
||||
'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
|
||||
'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
|
||||
'description': 'md5:2a03b67596eda0d1b5125c299f45e953',
|
||||
'timestamp': 1514507395,
|
||||
'upload_date': '20171229',
|
||||
'duration': 426.0,
|
||||
'cast': ['United Creators PMB GmbH'],
|
||||
'thumbnail': 'https://assets.nexx.cloud/media/75/56/79/3YKUSJN1LACN0CRxL.jpg',
|
||||
'display_id': 'die-lustigsten-instrumente-aus-dem-internet-teil-2',
|
||||
'alt_title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet Teil 2',
|
||||
'season_number': 0,
|
||||
'season': 'Season 0',
|
||||
'episode_number': 0,
|
||||
'episode': 'Episode 0',
|
||||
},
|
||||
|
||||
}, {
|
||||
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
|
||||
'only_matching': True,
|
||||
@ -27,18 +31,10 @@ class FunkIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, nexx_id = self._match_valid_url(url).groups()
|
||||
video = self._download_json(
|
||||
'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'nexx:741:' + nexx_id,
|
||||
'url': f'nexx:741:{nexx_id}',
|
||||
'ie_key': NexxIE.ie_key(),
|
||||
'id': nexx_id,
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'channel_id': str_or_none(video.get('channelId')),
|
||||
'display_id': display_id,
|
||||
'tags': video.get('tags'),
|
||||
'thumbnail': video.get('imageUrlLandscape'),
|
||||
}
|
||||
|
179
yt_dlp/extractor/getcourseru.py
Normal file
179
yt_dlp/extractor/getcourseru.py
Normal file
@ -0,0 +1,179 @@
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, int_or_none, url_or_none, urlencode_postdata
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GetCourseRuPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
|
||||
_TESTS = [{
|
||||
'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
|
||||
'info_dict': {
|
||||
'id': '513573381',
|
||||
'title': '190bdf93f1b29735309853a7a19e24b3',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
|
||||
'duration': 1693
|
||||
},
|
||||
'skip': 'JWT expired',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, None, 'Downloading player page')
|
||||
window_configs = self._search_json(
|
||||
r'window\.configs\s*=', webpage, 'config', None)
|
||||
video_id = str(window_configs['gcFileId'])
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
window_configs['masterPlaylistUrl'], video_id)
|
||||
|
||||
return {
|
||||
**traverse_obj(window_configs, {
|
||||
'title': ('videoHash', {str}),
|
||||
'thumbnail': ('previewUrl', {url_or_none}),
|
||||
'duration': ('videoDuration', {int_or_none}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
|
||||
class GetCourseRuIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'getcourseru'
|
||||
_DOMAINS = [
|
||||
'academymel.online',
|
||||
'marafon.mani-beauty.com',
|
||||
'on.psbook.ru'
|
||||
]
|
||||
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
||||
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://academymel.online/3video_1',
|
||||
'info_dict': {
|
||||
'id': '3059742',
|
||||
'display_id': '3video_1',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '513573381',
|
||||
'ext': 'mp4',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
|
||||
'duration': 1693
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'https://academymel.getcourse.ru/3video_1',
|
||||
'info_dict': {
|
||||
'id': '3059742',
|
||||
'display_id': '3video_1',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '513573381',
|
||||
'ext': 'mp4',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
|
||||
'duration': 1693
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'https://academymel.getcourse.ru/pl/teach/control/lesson/view?id=319141781&editMode=0',
|
||||
'info_dict': {
|
||||
'id': '319141781',
|
||||
'title': '1. Разминка у стены',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '4919601',
|
||||
'ext': 'mp4',
|
||||
'title': '1. Разминка у стены',
|
||||
'thumbnail': 'https://preview-htz.vhcdn.com/preview/5a521788e7dc25b4f70c3dff6512d90e/preview.jpg?version=1703223532&host=vh-81',
|
||||
'duration': 704
|
||||
},
|
||||
}],
|
||||
'skip': 'paid lesson'
|
||||
}, {
|
||||
'url': 'https://manibeauty.getcourse.ru/pl/teach/control/lesson/view?id=272499894',
|
||||
'info_dict': {
|
||||
'id': '272499894',
|
||||
'title': 'Мотивация к тренировкам',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '447479687',
|
||||
'ext': 'mp4',
|
||||
'title': 'Мотивация к тренировкам',
|
||||
'thumbnail': 'https://preview-htz.vhcdn.com/preview/70ed5b9f489dd03b4aff55bfdff71a26/preview.jpg?version=1685115787&host=vh-71',
|
||||
'duration': 30
|
||||
},
|
||||
}],
|
||||
'skip': 'paid lesson'
|
||||
}, {
|
||||
'url': 'https://gaismasmandalas.getcourse.io/ATLAUTSEVBUT',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LOGIN_URL_PATH = '/cms/system/login'
|
||||
|
||||
def _login(self, hostname, username, password):
|
||||
if self._get_cookies(f'https://{hostname}').get('PHPSESSID5'):
|
||||
return
|
||||
login_url = f'https://{hostname}{self._LOGIN_URL_PATH}'
|
||||
webpage = self._download_webpage(login_url, None)
|
||||
|
||||
self._request_webpage(
|
||||
login_url, None, 'Logging in', 'Failed to log in',
|
||||
data=urlencode_postdata({
|
||||
'action': 'processXdget',
|
||||
'xdgetId': self._html_search_regex(
|
||||
r'<form[^>]+\bclass="[^"]*\bstate-login[^"]*"[^>]+\bdata-xdget-id="([^"]+)"',
|
||||
webpage, 'xdgetId'),
|
||||
'params[action]': 'login',
|
||||
'params[url]': login_url,
|
||||
'params[object_type]': 'cms_page',
|
||||
'params[object_id]': -1,
|
||||
'params[email]': username,
|
||||
'params[password]': password,
|
||||
'requestTime': int(time.time()),
|
||||
'requestSimpleSign': self._html_search_regex(
|
||||
r'window.requestSimpleSign\s*=\s*"([\da-f]+)"', webpage, 'simple sign'),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
hostname = urllib.parse.urlparse(url).hostname
|
||||
username, password = self._get_login_info(netrc_machine=hostname)
|
||||
if username:
|
||||
self._login(hostname, username, password)
|
||||
|
||||
display_id = self._match_id(url)
|
||||
# NB: 404 is returned due to yt-dlp not properly following redirects #9020
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=404)
|
||||
if self._LOGIN_URL_PATH in urlh.url or urlh.status == 404:
|
||||
raise ExtractorError(
|
||||
f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
|
||||
expected=True)
|
||||
|
||||
playlist_id = self._search_regex(
|
||||
r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
|
||||
title = self._og_search_title(webpage) or self._html_extract_title(webpage)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),
|
||||
playlist_id, title, display_id=display_id, ie=GetCourseRuPlayerIE, video_kwargs={
|
||||
'url_transparent': True,
|
||||
'title': title,
|
||||
})
|
@ -66,7 +66,7 @@ class GofileIE(InfoExtractor):
|
||||
query_params = {
|
||||
'contentId': file_id,
|
||||
'token': self._TOKEN,
|
||||
'websiteToken': '7fd94ds12fds4', # From https://gofile.io/dist/js/alljs.js
|
||||
'wt': '4fd6sg89d7s6', # From https://gofile.io/dist/js/alljs.js
|
||||
}
|
||||
password = self.get_param('videopassword')
|
||||
if password:
|
||||
|
@ -19,9 +19,9 @@ class GoogleDriveIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:docs|drive)\.google\.com/
|
||||
(?:docs|drive|drive\.usercontent)\.google\.com/
|
||||
(?:
|
||||
(?:uc|open)\?.*?id=|
|
||||
(?:uc|open|download)\?.*?id=|
|
||||
file/d/
|
||||
)|
|
||||
video\.google\.com/get_player\?.*?docid=
|
||||
@ -53,6 +53,9 @@ class GoogleDriveIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://drive.usercontent.google.com/download?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_FORMATS_EXT = {
|
||||
'5': 'flv',
|
||||
@ -205,9 +208,10 @@ class GoogleDriveIE(InfoExtractor):
|
||||
formats.append(f)
|
||||
|
||||
source_url = update_url_query(
|
||||
'https://drive.google.com/uc', {
|
||||
'https://drive.usercontent.google.com/download', {
|
||||
'id': video_id,
|
||||
'export': 'download',
|
||||
'confirm': 't',
|
||||
})
|
||||
|
||||
def request_source_file(source_url, kind, data=None):
|
||||
|
@ -40,6 +40,22 @@ class GoPlayIE(InfoExtractor):
|
||||
'title': 'A Family for the Holidays',
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
|
||||
'info_dict': {
|
||||
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
|
||||
'ext': 'mp4',
|
||||
'title': 'S11 - Aflevering 1',
|
||||
'episode': 'Episode 1',
|
||||
'series': 'De Mol',
|
||||
'season_number': 11,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 11'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
}]
|
||||
|
||||
_id_token = None
|
||||
@ -77,16 +93,39 @@ class GoPlayIE(InfoExtractor):
|
||||
|
||||
api = self._download_json(
|
||||
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
|
||||
video_id, headers={'Authorization': 'Bearer %s' % self._id_token})
|
||||
video_id, headers={
|
||||
'Authorization': 'Bearer %s' % self._id_token,
|
||||
**self.geo_verification_headers(),
|
||||
})
|
||||
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
|
||||
if 'manifestUrls' in api:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
|
||||
|
||||
else:
|
||||
if 'ssai' not in api:
|
||||
raise ExtractorError('expecting Google SSAI stream')
|
||||
|
||||
ssai_content_source_id = api['ssai']['contentSourceID']
|
||||
ssai_video_id = api['ssai']['videoID']
|
||||
|
||||
dai = self._download_json(
|
||||
f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
|
||||
video_id, data=b'{"api-key":"null"}',
|
||||
headers={'content-type': 'application/json'})
|
||||
|
||||
periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
|
||||
|
||||
# skip pre-roll and mid-roll ads
|
||||
periods = [p for p in periods if '-ad-' not in p['id']]
|
||||
|
||||
formats, subtitles = self._merge_mpd_periods(periods)
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
|
@ -57,8 +57,8 @@ class GoProIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
metadata = self._parse_json(
|
||||
self._html_search_regex(r'window\.__reflectData\s*=\s*([^;]+)', webpage, 'metadata'), video_id)
|
||||
metadata = self._search_json(
|
||||
r'window\.__reflectData\s*=', webpage, 'metadata', video_id)
|
||||
|
||||
video_info = metadata['collectionMedia'][0]
|
||||
media_data = self._download_json(
|
||||
@ -99,7 +99,7 @@ class GoProIE(InfoExtractor):
|
||||
'duration': int_or_none(
|
||||
video_info.get('source_duration')),
|
||||
'artist': str_or_none(
|
||||
video_info.get('music_track_artist')),
|
||||
video_info.get('music_track_artist')) or None,
|
||||
'track': str_or_none(
|
||||
video_info.get('music_track_name')),
|
||||
video_info.get('music_track_name')) or None,
|
||||
}
|
||||
|
69
yt_dlp/extractor/ilpost.py
Normal file
69
yt_dlp/extractor/ilpost.py
Normal file
@ -0,0 +1,69 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class IlPostIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ilpost\.it/episodes/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ilpost.it/episodes/1-avis-akvasas-ka/',
|
||||
'md5': '43649f002d85e1c2f319bb478d479c40',
|
||||
'info_dict': {
|
||||
'id': '2972047',
|
||||
'ext': 'mp3',
|
||||
'display_id': '1-avis-akvasas-ka',
|
||||
'title': '1. Avis akvasas ka',
|
||||
'url': 'https://www.ilpost.it/wp-content/uploads/2023/12/28/1703781217-l-invasione-pt1-v6.mp3',
|
||||
'timestamp': 1703835014,
|
||||
'upload_date': '20231229',
|
||||
'duration': 2495.0,
|
||||
'availability': 'public',
|
||||
'series_id': '235598',
|
||||
'description': '',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
endpoint_metadata = self._search_json(
|
||||
r'var\s+ilpostpodcast\s*=', webpage, 'metadata', display_id)
|
||||
episode_id = endpoint_metadata['post_id']
|
||||
podcast_id = endpoint_metadata['podcast_id']
|
||||
podcast_metadata = self._download_json(
|
||||
endpoint_metadata['ajax_url'], display_id, data=urlencode_postdata({
|
||||
'action': 'checkpodcast',
|
||||
'cookie': endpoint_metadata['cookie'],
|
||||
'post_id': episode_id,
|
||||
'podcast_id': podcast_id,
|
||||
}))
|
||||
|
||||
episode = traverse_obj(podcast_metadata, (
|
||||
'data', 'postcastList', lambda _, v: str(v['id']) == episode_id, {dict}), get_all=False)
|
||||
if not episode:
|
||||
raise ExtractorError('Episode could not be extracted')
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'display_id': display_id,
|
||||
'series_id': podcast_id,
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(episode, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'url': ('podcast_raw_url', {url_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
||||
'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
|
||||
}),
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
@ -20,39 +21,64 @@ class JioSaavnSongIE(JioSaavnBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
|
||||
'md5': '7b1f70de088ede3a152ea34aece4df42',
|
||||
'md5': '3b84396d15ed9e083c3106f1fa589c04',
|
||||
'info_dict': {
|
||||
'id': 'OQsEfQFVUXk',
|
||||
'ext': 'mp3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Leja Re',
|
||||
'album': 'Leja Re',
|
||||
'thumbnail': 'https://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
|
||||
'duration': 205,
|
||||
'view_count': int,
|
||||
'release_year': 2018,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_VALID_BITRATES = ('16', '32', '64', '128', '320')
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
extract_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
|
||||
if invalid_bitrates := [br for br in extract_bitrates if br not in self._VALID_BITRATES]:
|
||||
raise ValueError(
|
||||
f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
|
||||
+ f'Valid bitrates are: {", ".join(self._VALID_BITRATES)}')
|
||||
|
||||
song_data = self._extract_initial_data(url, audio_id)['song']['song']
|
||||
media_data = self._download_json(
|
||||
'https://www.jiosaavn.com/api.php', audio_id, data=urlencode_postdata({
|
||||
'__call': 'song.generateAuthToken',
|
||||
'_format': 'json',
|
||||
'bitrate': '128',
|
||||
'url': song_data['encrypted_media_url'],
|
||||
}))
|
||||
formats = []
|
||||
for bitrate in extract_bitrates:
|
||||
media_data = self._download_json(
|
||||
'https://www.jiosaavn.com/api.php', audio_id, f'Downloading format info for {bitrate}',
|
||||
fatal=False, data=urlencode_postdata({
|
||||
'__call': 'song.generateAuthToken',
|
||||
'_format': 'json',
|
||||
'bitrate': bitrate,
|
||||
'url': song_data['encrypted_media_url'],
|
||||
}))
|
||||
if not media_data.get('auth_url'):
|
||||
self.report_warning(f'Unable to extract format info for {bitrate}')
|
||||
continue
|
||||
formats.append({
|
||||
'url': media_data['auth_url'],
|
||||
'ext': media_data.get('type'),
|
||||
'format_id': bitrate,
|
||||
'abr': int(bitrate),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'url': media_data['auth_url'],
|
||||
'ext': media_data.get('type'),
|
||||
'vcodec': 'none',
|
||||
'formats': formats,
|
||||
**traverse_obj(song_data, {
|
||||
'title': ('title', 'text'),
|
||||
'album': ('album', 'text'),
|
||||
'thumbnail': ('image', 0, {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('play_count', {int_or_none}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
140
yt_dlp/extractor/kukululive.py
Normal file
140
yt_dlp/extractor/kukululive.py
Normal file
@ -0,0 +1,140 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
filter_dict,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
qualities,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class KukuluLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://live\.erinn\.biz/live\.php\?h(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.erinn.biz/live.php?h675134569',
|
||||
'md5': 'e380fa6a47fc703d91cea913ab44ec2e',
|
||||
'info_dict': {
|
||||
'id': '675134569',
|
||||
'ext': 'mp4',
|
||||
'title': 'プロセカ',
|
||||
'description': 'テストも兼ねたプロセカ配信。',
|
||||
'timestamp': 1702689148,
|
||||
'upload_date': '20231216',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://live.erinn.biz/live.php?h102338092',
|
||||
'md5': 'dcf5167a934b1c60333461e13a81a6e2',
|
||||
'info_dict': {
|
||||
'id': '102338092',
|
||||
'ext': 'mp4',
|
||||
'title': 'Among Usで遊びます!!',
|
||||
'description': 'VTuberになりましたねんねこ㌨ですよろしくお願いします',
|
||||
'timestamp': 1704603118,
|
||||
'upload_date': '20240107',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://live.erinn.biz/live.php?h878049531',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_quality_meta(self, video_id, desc, code, force_h264=None):
|
||||
desc += ' (force_h264)' if force_h264 else ''
|
||||
qs = self._download_webpage(
|
||||
'https://live.erinn.biz/live.player.fplayer.php', video_id,
|
||||
f'Downloading {desc} quality metadata', f'Unable to download {desc} quality metadata',
|
||||
query=filter_dict({
|
||||
'hash': video_id,
|
||||
'action': f'get{code}liveByAjax',
|
||||
'force_h264': force_h264,
|
||||
}))
|
||||
return urllib.parse.parse_qs(qs)
|
||||
|
||||
def _add_quality_formats(self, formats, quality_meta):
|
||||
vcodec = traverse_obj(quality_meta, ('vcodec', 0, {str}))
|
||||
quality = traverse_obj(quality_meta, ('now_quality', 0, {str}))
|
||||
quality_priority = qualities(('low', 'h264', 'high'))(quality)
|
||||
if traverse_obj(quality_meta, ('hlsaddr', 0, {url_or_none})):
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'url': quality_meta['hlsaddr'][0],
|
||||
'ext': 'mp4',
|
||||
'vcodec': vcodec,
|
||||
'quality': quality_priority,
|
||||
})
|
||||
if traverse_obj(quality_meta, ('hlsaddr_audioonly', 0, {url_or_none})):
|
||||
formats.append({
|
||||
'format_id': join_nonempty(quality, 'audioonly'),
|
||||
'url': quality_meta['hlsaddr_audioonly'][0],
|
||||
'ext': 'm4a',
|
||||
'vcodec': 'none',
|
||||
'quality': quality_priority,
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
html = self._download_webpage(url, video_id)
|
||||
|
||||
if '>タイムシフトが見つかりませんでした。<' in html:
|
||||
raise ExtractorError('This stream has expired', expected=True)
|
||||
|
||||
title = clean_html(
|
||||
get_element_by_id('livetitle', html.replace('<SPAN', '<span').replace('SPAN>', 'span>')))
|
||||
description = self._html_search_meta('Description', html)
|
||||
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], html)
|
||||
|
||||
if self._search_regex(r'(var\s+timeshift\s*=\s*false)', html, 'is livestream', default=False):
|
||||
formats = []
|
||||
for (desc, code) in [('high', 'Z'), ('low', 'ForceLow')]:
|
||||
quality_meta = self._get_quality_meta(video_id, desc, code)
|
||||
self._add_quality_formats(formats, quality_meta)
|
||||
if desc == 'high' and traverse_obj(quality_meta, ('vcodec', 0)) == 'HEVC':
|
||||
self._add_quality_formats(
|
||||
formats, self._get_quality_meta(video_id, desc, code, force_h264='1'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
# VOD extraction
|
||||
player_html = self._download_webpage(
|
||||
'https://live.erinn.biz/live.timeshift.fplayer.php', video_id,
|
||||
'Downloading player html', 'Unable to download player html', query={'hash': video_id})
|
||||
|
||||
sources = traverse_obj(self._search_json(
|
||||
r'var\s+fplayer_source\s*=', player_html, 'stream data', video_id,
|
||||
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json), lambda _, v: v['file'])
|
||||
|
||||
def entries(segments, playlist=True):
|
||||
for i, segment in enumerate(segments, 1):
|
||||
yield {
|
||||
'id': f'{video_id}_{i}' if playlist else video_id,
|
||||
'title': f'{title} (Part {i})' if playlist else title,
|
||||
'description': description,
|
||||
'timestamp': traverse_obj(segment, ('time_start', {int_or_none})),
|
||||
'thumbnail': thumbnail,
|
||||
'formats': [{
|
||||
'url': urljoin('https://live.erinn.biz', segment['file']),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
}],
|
||||
}
|
||||
|
||||
if len(sources) == 1:
|
||||
return next(entries(sources, playlist=False))
|
||||
|
||||
return self.playlist_result(entries(sources), video_id, title, description, multi_video=True)
|
@ -13,7 +13,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/',
|
||||
'md5': 'e94de44cd80818084352fcf8de1ce82c',
|
||||
'md5': 'a0c3069b7e4c4526abf0053a7713f56f',
|
||||
'info_dict': {
|
||||
'id': 'g9j7Eovo',
|
||||
'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées',
|
||||
@ -26,7 +26,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/',
|
||||
'md5': '0b3f10332b812034b3a3eda1ef877c5f',
|
||||
'md5': '319c662943dd777bab835cae1e2d73a5',
|
||||
'info_dict': {
|
||||
'id': 'LeAgybyc',
|
||||
'title': 'Intelligence artificielle : faut-il s’en méfier ?',
|
||||
@ -41,7 +41,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/',
|
||||
'md5': '3972ddf2d5f8b98699f191687258e2f9',
|
||||
'md5': '6289f9489efb969e38245f31721596fe',
|
||||
'info_dict': {
|
||||
'id': 'QChnbPYA',
|
||||
'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International',
|
||||
@ -55,7 +55,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/',
|
||||
'md5': '3ac0a0769546ee6be41ab52caea5d9a9',
|
||||
'md5': 'f6df814cae53e85937621599d2967520',
|
||||
'info_dict': {
|
||||
'id': 'QJzqoNbf',
|
||||
'title': 'La philosophe Nathalie Sarthou-Lajus est l’invitée du Figaro Live',
|
||||
@ -73,7 +73,8 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']['playerData']
|
||||
player_data = self._search_nextjs_data(
|
||||
webpage, display_id)['props']['pageProps']['initialProps']['pageData']['playerData']
|
||||
|
||||
return self.url_result(
|
||||
f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'),
|
||||
|
@ -3,16 +3,15 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
srt_subtitles_timecode,
|
||||
strip_or_none,
|
||||
mimetype2ext,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
@ -83,15 +82,29 @@ class LinkedInLearningBaseIE(LinkedInBaseIE):
|
||||
|
||||
|
||||
class LinkedInIE(LinkedInBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/.+?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
|
||||
'info_dict': {
|
||||
'id': '6850898786781339649',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing',
|
||||
'description': 'md5:be125430bab1c574f16aeb186a4d5b19',
|
||||
'creator': 'Mishal K.'
|
||||
'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing #nowhiring #sendinblue…',
|
||||
'description': 'md5:2998a31f6f479376dd62831f53a80f71',
|
||||
'uploader': 'Mishal K.',
|
||||
'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
|
||||
'like_count': int
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.linkedin.com/posts/the-mathworks_2_what-is-mathworks-cloud-center-activity-7151241570371948544-4Gu7',
|
||||
'info_dict': {
|
||||
'id': '7151241570371948544',
|
||||
'ext': 'mp4',
|
||||
'title': 'MathWorks on LinkedIn: What Is MathWorks Cloud Center?',
|
||||
'description': 'md5:95f9d4eeb6337882fb47eefe13d7a40c',
|
||||
'uploader': 'MathWorks',
|
||||
'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
|
||||
'like_count': int,
|
||||
'subtitles': 'mincount:1'
|
||||
},
|
||||
}]
|
||||
|
||||
@ -99,26 +112,30 @@ class LinkedInIE(LinkedInBaseIE):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_extract_title(webpage)
|
||||
description = clean_html(get_element_by_class('share-update-card__update-text', webpage))
|
||||
like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage))
|
||||
creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage)))
|
||||
|
||||
sources = self._parse_json(extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))['data-sources'], video_id)
|
||||
video_attrs = extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))
|
||||
sources = self._parse_json(video_attrs['data-sources'], video_id)
|
||||
formats = [{
|
||||
'url': source['src'],
|
||||
'ext': mimetype2ext(source.get('type')),
|
||||
'tbr': float_or_none(source.get('data-bitrate'), scale=1000),
|
||||
} for source in sources]
|
||||
subtitles = {'en': [{
|
||||
'url': video_attrs['data-captions-url'],
|
||||
'ext': 'vtt',
|
||||
}]} if url_or_none(video_attrs.get('data-captions-url')) else {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'like_count': like_count,
|
||||
'creator': creator,
|
||||
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
|
||||
'like_count': int_or_none(self._search_regex(
|
||||
r'\bdata-num-reactions="(\d+)"', webpage, 'reactions', default=None)),
|
||||
'uploader': traverse_obj(
|
||||
self._yield_json_ld(webpage, video_id),
|
||||
(lambda _, v: v['@type'] == 'SocialMediaPosting', 'author', 'name', {str}), get_all=False),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': description,
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
282
yt_dlp/extractor/lsm.py
Normal file
282
yt_dlp/extractor/lsm.py
Normal file
@ -0,0 +1,282 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LSMLREmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:
|
||||
(?:latvijasradio|lr1|lr2|klasika|lr4|naba|radioteatris)\.lsm|
|
||||
pieci
|
||||
)\.lv/[^/?#]+/(?:
|
||||
pleijeris|embed
|
||||
)/?\?(?:[^#]+&)?(?:show|id)=(?P<id>\d+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://latvijasradio.lsm.lv/lv/embed/?theme=black&size=16x9&showCaptions=0&id=183522',
|
||||
'md5': '719b33875cd1429846eeeaeec6df2830',
|
||||
'info_dict': {
|
||||
'id': 'a342781',
|
||||
'ext': 'mp3',
|
||||
'duration': 1823,
|
||||
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/gallery_fd4675ac.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1270&theme=white&size=16x9',
|
||||
'info_dict': {
|
||||
'id': '1270',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'playlist': [{
|
||||
'md5': '2e61b6eceff00d14d57fdbbe6ab24cac',
|
||||
'info_dict': {
|
||||
'id': 'a297397',
|
||||
'ext': 'mp3',
|
||||
'title': 'Eriks Emanuels Šmits "Pilāta evaņģēlijs". 1. daļa',
|
||||
'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f131ae81e3c.jpg',
|
||||
'duration': 3300,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1269&theme=white&size=16x9',
|
||||
'md5': '24810d4a961da2295d9860afdcaf4f5a',
|
||||
'info_dict': {
|
||||
'id': 'a230690',
|
||||
'ext': 'mp3',
|
||||
'title': 'Jens Ahlboms "Spārni". Radioizrāde ar Mārtiņa Freimaņa mūziku',
|
||||
'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f13023a457c.jpg',
|
||||
'duration': 1788,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://lr1.lsm.lv/lv/embed/?id=166557&show=0&theme=white&size=16x9',
|
||||
'info_dict': {
|
||||
'id': '166557',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': '6a8b0927572f443f09c6e50a3ad65f2d',
|
||||
'info_dict': {
|
||||
'id': 'a303104',
|
||||
'ext': 'mp3',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
|
||||
'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits',
|
||||
'duration': 3222,
|
||||
},
|
||||
}, {
|
||||
'md5': '5d5e191e718b7644e5118b7b4e093a6d',
|
||||
'info_dict': {
|
||||
'id': 'v303104',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
|
||||
'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits - Video Version',
|
||||
'duration': 3222,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://lr1.lsm.lv/lv/embed/?id=183522&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lr2.lsm.lv/lv/embed/?id=182126&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://klasika.lsm.lv/lv/embed/?id=110806&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lr4.lsm.lv/lv/embed/?id=184282&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://pieci.lv/lv/embed/?id=168896&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://naba.lsm.lv/lv/embed/?id=182901&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=176439&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lr1.lsm.lv/lv/pleijeris/?embed=0&id=48205&time=00%3A00&idx=0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
video_id = traverse_obj(query, (
|
||||
('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data, media_data = self._search_regex(
|
||||
r'LR\.audio\.Player\s*\([^{]*(?P<player>\{.*?\}),(?P<media>\{.*\})\);',
|
||||
webpage, 'player json', group=('player', 'media'))
|
||||
|
||||
player_json = self._parse_json(
|
||||
player_data, video_id, transform_source=js_to_json, fatal=False) or {}
|
||||
media_json = self._parse_json(media_data, video_id, transform_source=js_to_json)
|
||||
|
||||
entries = []
|
||||
for item in traverse_obj(media_json, (('audio', 'video'), lambda _, v: v['id'])):
|
||||
formats = []
|
||||
for source_url in traverse_obj(item, ('sources', ..., 'file', {url_or_none})):
|
||||
if determine_ext(source_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(source_url, video_id, fatal=False))
|
||||
else:
|
||||
formats.append({'url': source_url})
|
||||
|
||||
id_ = item['id']
|
||||
title = item.get('title')
|
||||
if id_.startswith('v') and not title:
|
||||
title = traverse_obj(
|
||||
media_json, ('audio', lambda _, v: v['id'][1:] == id_[1:], 'title',
|
||||
{lambda x: x and f'{x} - Video Version'}), get_all=False)
|
||||
|
||||
entries.append({
|
||||
'formats': formats,
|
||||
'thumbnail': urljoin(url, player_json.get('poster')),
|
||||
'id': id_,
|
||||
'title': title,
|
||||
'duration': traverse_obj(item, ('duration', {int_or_none})),
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
|
||||
class LSMLTVEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://ltv\.lsm\.lv/embed\?(?:[^#]+&)?c=(?P<id>[^#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ltv.lsm.lv/embed?c=eyJpdiI6IjQzbHVUeHAyaDJiamFjcjdSUUFKdnc9PSIsInZhbHVlIjoiMHl3SnJNRmd2TmFIdnZwOGtGUUpzODFzUEZ4SVVsN2xoRjliSW9vckUyMWZIWG8vbWVzaFFkY0lhNmRjbjRpaCIsIm1hYyI6ImMzNjdhMzFhNTFhZmY1ZmE0NWI5YmFjZGI1YmJiNGEyNjgzNDM4MjUzMWEwM2FmMDMyZDMwYWM1MDFjZmM5MGIiLCJ0YWciOiIifQ==',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1700589151,
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': 'D23-6000-105_cetstud',
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=',
|
||||
'md5': 'a1711e190fe680fdb68fd8413b378e87',
|
||||
'info_dict': {
|
||||
'id': 'wUnFArIPDSY',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'LTV_16plus',
|
||||
'release_date': '20220514',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCNMrnafwXD2XKeeQOyfkFCw',
|
||||
'view_count': int,
|
||||
'availability': 'public',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/wUnFArIPDSY/maxresdefault.jpg',
|
||||
'release_timestamp': 1652544074,
|
||||
'title': 'EIROVĪZIJA SALĀTOS',
|
||||
'live_status': 'was_live',
|
||||
'uploader_id': '@LTV16plus',
|
||||
'comment_count': int,
|
||||
'channel_id': 'UCNMrnafwXD2XKeeQOyfkFCw',
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'duration': 5269,
|
||||
'upload_date': '20220514',
|
||||
'age_limit': 0,
|
||||
'channel': 'LTV_16plus',
|
||||
'playable_in_embed': True,
|
||||
'tags': [],
|
||||
'uploader_url': 'https://www.youtube.com/@LTV16plus',
|
||||
'like_count': int,
|
||||
'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = urllib.parse.unquote(self._match_id(url))
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_json(
|
||||
r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id)
|
||||
embed_type = traverse_obj(data, ('source', 'name', {str}))
|
||||
|
||||
if embed_type == 'telia':
|
||||
ie_key = 'CloudyCDN'
|
||||
embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none}))
|
||||
elif embed_type == 'youtube':
|
||||
ie_key = 'Youtube'
|
||||
embed_url = traverse_obj(data, ('source', 'id', {str}))
|
||||
else:
|
||||
raise ExtractorError(f'Unsupported embed type {embed_type!r}')
|
||||
|
||||
return self.url_result(
|
||||
embed_url, ie_key, video_id, **traverse_obj(data, {
|
||||
'title': ('parentInfo', 'title'),
|
||||
'duration': ('parentInfo', 'duration', {int_or_none}),
|
||||
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||
}))
|
||||
|
||||
|
||||
class LSMReplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1700586300,
|
||||
'description': 'md5:0f1b14798cc39e1ae578bd0eb268f759',
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': '4. studija. Zolitūdes traģēdija un Inčupes stacija',
|
||||
'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
|
||||
'md5': '719b33875cd1429846eeeaeec6df2830',
|
||||
'info_dict': {
|
||||
'id': 'a342781',
|
||||
'ext': 'mp3',
|
||||
'duration': 1823,
|
||||
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg',
|
||||
'upload_date': '20231102',
|
||||
'timestamp': 1698921060,
|
||||
'description': 'md5:7bac3b2dd41e44325032943251c357b1',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _fix_nuxt_data(self, webpage):
|
||||
return re.sub(r'Object\.create\(null(?:,(\{.+\}))?\)', lambda m: m.group(1) or 'null', webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._search_nuxt_data(
|
||||
self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
**traverse_obj(data, {
|
||||
'url': ('playback', 'service', 'url', {url_or_none}),
|
||||
'title': ('mediaItem', 'title'),
|
||||
'description': ('mediaItem', ('lead', 'body')),
|
||||
'duration': ('mediaItem', 'duration', {int_or_none}),
|
||||
'timestamp': ('mediaItem', 'aired_at', {parse_iso8601}),
|
||||
'thumbnail': ('mediaItem', 'largeThumbnail', {url_or_none}),
|
||||
}, get_all=False),
|
||||
}
|
@ -28,12 +28,24 @@ class MagellanTVIE(InfoExtractor):
|
||||
'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.magellantv.com/watch/celebration-nation',
|
||||
'info_dict': {
|
||||
'id': 'celebration-nation',
|
||||
'ext': 'mp4',
|
||||
'tags': ['Art & Culture', 'Human Interest', 'Anthropology', 'China', 'History'],
|
||||
'duration': 2640.0,
|
||||
'title': 'Ancestors',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['reactContext']['video']['detail']
|
||||
data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', 'reactContext',
|
||||
(('video', 'detail'), ('series', 'currentEpisode')), {dict}), get_all=False)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id)
|
||||
|
||||
return {
|
||||
|
62
yt_dlp/extractor/magentamusik.py
Normal file
62
yt_dlp/extractor/magentamusik.py
Normal file
@ -0,0 +1,62 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MagentaMusikIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?magentamusik\.de/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.magentamusik.de/marty-friedman-woa-2023-9208205928595409235',
|
||||
'md5': 'd82dd4748f55fc91957094546aaf8584',
|
||||
'info_dict': {
|
||||
'id': '9208205928595409235',
|
||||
'display_id': 'marty-friedman-woa-2023-9208205928595409235',
|
||||
'ext': 'mp4',
|
||||
'title': 'Marty Friedman: W:O:A 2023',
|
||||
'alt_title': 'Konzert vom: 05.08.2023 13:00',
|
||||
'duration': 2760,
|
||||
'categories': ['Musikkonzert'],
|
||||
'release_year': 2023,
|
||||
'location': 'Deutschland',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_config = self._search_json(
|
||||
r'data-js-element="o-video-player__config">', webpage, 'player config', display_id, fatal=False)
|
||||
if not player_config:
|
||||
raise ExtractorError('No video found', expected=True)
|
||||
|
||||
asset_id = player_config['assetId']
|
||||
asset_details = self._download_json(
|
||||
f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/assetdetails/58938/{asset_id}',
|
||||
display_id, note='Downloading asset details')
|
||||
|
||||
video_id = traverse_obj(
|
||||
asset_details, ('content', 'partnerInformation', ..., 'reference', {str}), get_all=False)
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract video id')
|
||||
|
||||
vod_data = self._download_json(
|
||||
f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/player/58935/{video_id}/Main%20Movie', video_id)
|
||||
smil_url = traverse_obj(
|
||||
vod_data, ('content', 'feature', 'representations', ...,
|
||||
'contentPackages', ..., 'media', 'href', {url_or_none}), get_all=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': self._extract_smil_formats(smil_url, video_id),
|
||||
**traverse_obj(vod_data, ('content', 'feature', 'metadata', {
|
||||
'title': 'title',
|
||||
'alt_title': 'originalTitle',
|
||||
'description': 'longDescription',
|
||||
'duration': ('runtimeInSeconds', {int_or_none}),
|
||||
'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
|
||||
'release_year': ('yearOfProduction', {int_or_none}),
|
||||
'categories': ('mainGenre', {str}, {lambda x: x and [x]}),
|
||||
})),
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MagentaMusik360IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?magenta-musik-360\.de/([a-z0-9-]+-(?P<id>[0-9]+)|festivals/.+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.magenta-musik-360.de/within-temptation-wacken-2019-1-9208205928595185932',
|
||||
'md5': '65b6f060b40d90276ec6fb9b992c1216',
|
||||
'info_dict': {
|
||||
'id': '9208205928595185932',
|
||||
'ext': 'm3u8',
|
||||
'title': 'WITHIN TEMPTATION',
|
||||
'description': 'Robert Westerholt und Sharon Janny den Adel gründeten die Symphonic Metal-Band. Privat sind die Niederländer ein Paar und haben zwei Kinder. Die Single Ice Queen brachte ihnen Platin und Gold und verhalf 2002 zum internationalen Durchbruch. Charakteristisch für die Band war Anfangs der hohe Gesang von Frontfrau Sharon. Stilistisch fing die Band im Gothic Metal an. Mit neuem Sound, schnellen Gitarrenriffs und Gitarrensoli, avancierte Within Temptation zur erfolgreichen Rockband. Auch dieses Jahr wird die Band ihre Fangemeinde wieder mitreißen.',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.magenta-musik-360.de/festivals/wacken-world-wide-2020-body-count-feat-ice-t',
|
||||
'md5': '81010d27d7cab3f7da0b0f681b983b7e',
|
||||
'info_dict': {
|
||||
'id': '9208205928595231363',
|
||||
'ext': 'm3u8',
|
||||
'title': 'Body Count feat. Ice-T',
|
||||
'description': 'Body Count feat. Ice-T konnten bereits im vergangenen Jahr auf dem „Holy Ground“ in Wacken überzeugen. 2020 gehen die Crossover-Metaller aus einem Club in Los Angeles auf Sendung und bringen mit ihrer Mischung aus Metal und Hip-Hop Abwechslung und ordentlich Alarm zum WWW. Bereits seit 1990 stehen die beiden Gründer Ice-T (Gesang) und Ernie C (Gitarre) auf der Bühne. Sieben Studioalben hat die Gruppe bis jetzt veröffentlicht, darunter das Debüt „Body Count“ (1992) mit dem kontroversen Track „Cop Killer“.',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# _match_id casts to string, but since "None" is not a valid video_id for magenta
|
||||
# there is no risk for confusion
|
||||
if video_id == "None":
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_id = self._html_search_regex(r'data-asset-id="([^"]+)"', webpage, 'video_id')
|
||||
json = self._download_json("https://wcps.t-online.de/cvss/magentamusic/vodplayer/v3/player/58935/%s/Main%%20Movie" % video_id, video_id)
|
||||
xml_url = json['content']['feature']['representations'][0]['contentPackages'][0]['media']['href']
|
||||
metadata = json['content']['feature'].get('metadata')
|
||||
title = None
|
||||
description = None
|
||||
duration = None
|
||||
thumbnails = []
|
||||
if metadata:
|
||||
title = metadata.get('title')
|
||||
description = metadata.get('fullDescription')
|
||||
duration = metadata.get('runtimeInSeconds')
|
||||
for img_key in ('teaserImageWide', 'smallCoverImage'):
|
||||
if img_key in metadata:
|
||||
thumbnails.append({'url': metadata[img_key].get('href')})
|
||||
|
||||
xml = self._download_xml(xml_url, video_id)
|
||||
final_url = xml[0][0][0].attrib['src']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'url': final_url,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails
|
||||
}
|
@ -8,7 +8,8 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@ -16,7 +17,7 @@ class MedalTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/games/[^/?#&]+/clips/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
|
||||
'md5': '6930f8972914b6b9fdc2bb3918098ba0',
|
||||
'md5': '03e4911fdcf7fce563090705c2e79267',
|
||||
'info_dict': {
|
||||
'id': 'jTBFnLKdLy15K',
|
||||
'ext': 'mp4',
|
||||
@ -33,8 +34,8 @@ class MedalTVIE(InfoExtractor):
|
||||
'duration': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2mA60jWAGQCBH',
|
||||
'md5': '3d19d426fe0b2d91c26e412684e66a06',
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2mA60jWAGQCBH',
|
||||
'md5': 'fc7a3e4552ae8993c1c4006db46be447',
|
||||
'info_dict': {
|
||||
'id': '2mA60jWAGQCBH',
|
||||
'ext': 'mp4',
|
||||
@ -52,7 +53,7 @@ class MedalTVIE(InfoExtractor):
|
||||
'duration': 23,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2um24TWdty0NA',
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2um24TWdty0NA',
|
||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||
'info_dict': {
|
||||
'id': '2um24TWdty0NA',
|
||||
@ -81,7 +82,7 @@ class MedalTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(update_url_query(url, {'mobilebypass': 'true'}), video_id)
|
||||
|
||||
hydration_data = self._search_json(
|
||||
r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
|
||||
|
@ -355,11 +355,11 @@ class MLBArticleIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '36db7394-343c-4ea3-b8ca-ead2e61bca9a',
|
||||
'title': 'Machado\'s grab draws hilarious irate reaction',
|
||||
'modified_timestamp': 1650130737,
|
||||
'modified_timestamp': 1675888370,
|
||||
'description': 'md5:a19d4eb0487b2cb304e9a176f6b67676',
|
||||
'modified_date': '20220416',
|
||||
'modified_date': '20230208',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist_mincount': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -367,15 +367,13 @@ class MLBArticleIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
apollo_cache_json = self._search_json(r'window\.initState\s*=', webpage, 'window.initState', display_id)['apolloCache']
|
||||
|
||||
content_data_id = traverse_obj(
|
||||
apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getForgeContent'), 'id'), get_all=False)
|
||||
|
||||
content_real_info = apollo_cache_json[content_data_id]
|
||||
content_real_info = traverse_obj(
|
||||
apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getArticle')), get_all=False)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
traverse_obj(content_real_info, ('parts', lambda _, v: v['typename'] == 'Video', 'id')),
|
||||
getter=lambda x: f'https://www.mlb.com/video/{apollo_cache_json[x]["slug"]}',
|
||||
ie=MLBVideoIE, playlist_id=content_real_info.get('_translationId'),
|
||||
traverse_obj(content_real_info, ('parts', lambda _, v: v['__typename'] == 'Video' or v['type'] == 'video')),
|
||||
getter=lambda x: f'https://www.mlb.com/video/{x["slug"]}',
|
||||
ie=MLBVideoIE, playlist_id=content_real_info.get('translationId'),
|
||||
title=self._html_search_meta('og:title', webpage),
|
||||
description=content_real_info.get('summary'),
|
||||
modified_timestamp=parse_iso8601(content_real_info.get('lastUpdatedDate')))
|
||||
|
@ -177,6 +177,7 @@ class MotherlessIE(InfoExtractor):
|
||||
|
||||
|
||||
class MotherlessPaginatedIE(InfoExtractor):
|
||||
_EXTRA_QUERY = {}
|
||||
_PAGE_SIZE = 60
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
@ -199,7 +200,7 @@ class MotherlessPaginatedIE(InfoExtractor):
|
||||
def get_page(idx):
|
||||
page = idx + 1
|
||||
current_page = webpage if not idx else self._download_webpage(
|
||||
real_url, item_id, note=f'Downloading page {page}', query={'page': page})
|
||||
real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY})
|
||||
yield from self._extract_entries(current_page, real_url)
|
||||
|
||||
return self.playlist_result(
|
||||
@ -213,7 +214,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE):
|
||||
'url': 'http://motherless.com/gv/movie_scenes',
|
||||
'info_dict': {
|
||||
'id': 'movie_scenes',
|
||||
'title': 'Movie Scenes',
|
||||
'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully',
|
||||
},
|
||||
'playlist_mincount': 540,
|
||||
}, {
|
||||
@ -244,7 +245,7 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
|
||||
'id': '338999F',
|
||||
'title': 'Random',
|
||||
},
|
||||
'playlist_mincount': 190,
|
||||
'playlist_mincount': 171,
|
||||
}, {
|
||||
'url': 'https://motherless.com/GVABD6213',
|
||||
'info_dict': {
|
||||
@ -270,3 +271,27 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/GV{item_id}')
|
||||
|
||||
|
||||
class MotherlessUploaderIE(MotherlessPaginatedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://motherless.com/u/Mrgo4hrs2023',
|
||||
'info_dict': {
|
||||
'id': 'Mrgo4hrs2023',
|
||||
'title': "Mrgo4hrs2023's Uploads - Videos",
|
||||
},
|
||||
'playlist_mincount': 32,
|
||||
}, {
|
||||
'url': 'https://motherless.com/u/Happy_couple?t=v',
|
||||
'info_dict': {
|
||||
'id': 'Happy_couple',
|
||||
'title': "Happy_couple's Uploads - Videos",
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}]
|
||||
|
||||
_EXTRA_QUERY = {'t': 'v'}
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/u/{item_id}?t=v')
|
||||
|
171
yt_dlp/extractor/mx3.py
Normal file
171
yt_dlp/extractor/mx3.py
Normal file
@ -0,0 +1,171 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
try_call,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Mx3BaseIE(InfoExtractor):
|
||||
_VALID_URL_TMPL = r'https?://(?:www\.)?%s/t/(?P<id>\w+)'
|
||||
_FORMATS = [{
|
||||
'url': 'player_asset',
|
||||
'format_id': 'default',
|
||||
'quality': 0,
|
||||
}, {
|
||||
'url': 'player_asset?quality=hd',
|
||||
'format_id': 'hd',
|
||||
'quality': 1,
|
||||
}, {
|
||||
'url': 'download',
|
||||
'format_id': 'download',
|
||||
'quality': 2,
|
||||
}, {
|
||||
'url': 'player_asset?quality=source',
|
||||
'format_id': 'source',
|
||||
'quality': 2,
|
||||
}]
|
||||
|
||||
def _extract_formats(self, track_id):
|
||||
formats = []
|
||||
for fmt in self._FORMATS:
|
||||
format_url = f'https://{self._DOMAIN}/tracks/{track_id}/{fmt["url"]}'
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(format_url), track_id, fatal=False, expected_status=404,
|
||||
note=f'Checking for format {fmt["format_id"]}')
|
||||
if urlh and urlh.status == 200:
|
||||
formats.append({
|
||||
**fmt,
|
||||
'url': format_url,
|
||||
'ext': urlhandle_detect_ext(urlh),
|
||||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
track_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
more_info = get_element_by_class('single-more-info', webpage)
|
||||
data = self._download_json(f'https://{self._DOMAIN}/t/{track_id}.json', track_id, fatal=False)
|
||||
|
||||
def get_info_field(name):
|
||||
return self._html_search_regex(
|
||||
rf'<dt[^>]*>\s*{name}\s*</dt>\s*<dd[^>]*>(.*?)</dd>',
|
||||
more_info, name, default=None, flags=re.DOTALL)
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'formats': self._extract_formats(track_id),
|
||||
'genre': self._html_search_regex(
|
||||
r'<div\b[^>]+class="single-band-genre"[^>]*>([^<]+)</div>', webpage, 'genre', default=None),
|
||||
'release_year': int_or_none(get_info_field('Year of creation')),
|
||||
'description': get_info_field('Description'),
|
||||
'tags': try_call(lambda: get_info_field('Tag').split(', '), list),
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'artist': (('performer_name', 'artist'), {str}),
|
||||
'album_artist': ('artist', {str}),
|
||||
'composer': ('composer_name', {str}),
|
||||
'thumbnail': (('picture_url_xlarge', 'picture_url'), {url_or_none}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
|
||||
class Mx3IE(Mx3BaseIE):
|
||||
_DOMAIN = 'mx3.ch'
|
||||
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
|
||||
_TESTS = [{
|
||||
'url': 'https://mx3.ch/t/1Cru',
|
||||
'md5': '7ba09e9826b4447d4e1ce9d69e0e295f',
|
||||
'info_dict': {
|
||||
'id': '1Cru',
|
||||
'ext': 'wav',
|
||||
'artist': 'Godina',
|
||||
'album_artist': 'Tortue Tortue',
|
||||
'composer': 'Olivier Godinat',
|
||||
'genre': 'Rock',
|
||||
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/4643/square_xlarge/1-s-envoler-1.jpg?1630272813',
|
||||
'title': "S'envoler",
|
||||
'release_year': 2021,
|
||||
'tags': [],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mx3.ch/t/1LIY',
|
||||
'md5': '48293cb908342547827f963a5a2e9118',
|
||||
'info_dict': {
|
||||
'id': '1LIY',
|
||||
'ext': 'mov',
|
||||
'artist': 'Tania Kimfumu',
|
||||
'album_artist': 'The Broots',
|
||||
'composer': 'Emmanuel Diserens',
|
||||
'genre': 'Electro',
|
||||
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0110/0003/video_xlarge/frame_0000.png?1686963670',
|
||||
'title': 'The Broots-Larytta remix "Begging For Help"',
|
||||
'release_year': 2023,
|
||||
'tags': ['the broots', 'cassata records', 'larytta'],
|
||||
'description': '"Begging for Help" Larytta Remix Official Video\nRealized By Kali Donkilie in 2023',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mx3.ch/t/1C6E',
|
||||
'md5': '1afcd578493ddb8e5008e94bb6d97e25',
|
||||
'info_dict': {
|
||||
'id': '1C6E',
|
||||
'ext': 'wav',
|
||||
'artist': 'Alien Bubblegum',
|
||||
'album_artist': 'Alien Bubblegum',
|
||||
'composer': 'Alien Bubblegum',
|
||||
'genre': 'Punk',
|
||||
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/1551/square_xlarge/pandora-s-box-cover-with-title.png?1627054733',
|
||||
'title': 'Wide Awake',
|
||||
'release_year': 2021,
|
||||
'tags': ['alien bubblegum', 'bubblegum', 'alien', 'pop punk', 'poppunk'],
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class Mx3NeoIE(Mx3BaseIE):
|
||||
_DOMAIN = 'neo.mx3.ch'
|
||||
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
|
||||
_TESTS = [{
|
||||
'url': 'https://neo.mx3.ch/t/1hpd',
|
||||
'md5': '6d9986bbae5cac3296ec8813bf965eb2',
|
||||
'info_dict': {
|
||||
'id': '1hpd',
|
||||
'ext': 'wav',
|
||||
'artist': 'Baptiste Lopez',
|
||||
'album_artist': 'Kammerorchester Basel',
|
||||
'composer': 'Jannik Giger',
|
||||
'genre': 'Composition, Orchestra',
|
||||
'title': 'Troisième œil. Für Kammerorchester (2023)',
|
||||
'thumbnail': 'https://neo.mx3.ch/pictures/neo/file/0000/0241/square_xlarge/kammerorchester-basel-group-photo-2_c_-lukasz-rajchert.jpg?1560341252',
|
||||
'release_year': 2023,
|
||||
'tags': [],
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class Mx3VolksmusikIE(Mx3BaseIE):
|
||||
_DOMAIN = 'volksmusik.mx3.ch'
|
||||
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
|
||||
_TESTS = [{
|
||||
'url': 'https://volksmusik.mx3.ch/t/Zx',
|
||||
'md5': 'dd967a7b0c1ef898f3e072cf9c2eae3c',
|
||||
'info_dict': {
|
||||
'id': 'Zx',
|
||||
'ext': 'mp3',
|
||||
'artist': 'Ländlerkapelle GrischArt',
|
||||
'album_artist': 'Ländlerkapelle GrischArt',
|
||||
'composer': 'Urs Glauser',
|
||||
'genre': 'Instrumental, Graubünden',
|
||||
'title': 'Chämilouf',
|
||||
'thumbnail': 'https://volksmusik.mx3.ch/pictures/vxm/file/0000/3815/square_xlarge/grischart1.jpg?1450530120',
|
||||
'release_year': 2012,
|
||||
'tags': [],
|
||||
}
|
||||
}]
|
@ -1,20 +1,25 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
import time
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -110,6 +115,18 @@ class NaverBaseIE(InfoExtractor):
|
||||
**self.process_subtitles(video_data, get_subs),
|
||||
}
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
api_endpoint = f'https://apis.naver.com/now_web2/now_web_api/v1{path}'
|
||||
key = b'nbxvs5nwNG9QKEWK0ADjYA4JZoujF4gHcIwvoCxFTPAeamq5eemvt5IWAYXxrbYM'
|
||||
msgpad = int(time.time() * 1000)
|
||||
md = base64.b64encode(hmac.HMAC(
|
||||
key, f'{api_endpoint[:255]}{msgpad}'.encode(), digestmod=hashlib.sha1).digest()).decode()
|
||||
|
||||
return self._download_json(api_endpoint, video_id=video_id, headers=self.geo_verification_headers(), query={
|
||||
'msgpad': msgpad,
|
||||
'md': md,
|
||||
})['result']
|
||||
|
||||
|
||||
class NaverIE(NaverBaseIE):
|
||||
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)'
|
||||
@ -125,21 +142,32 @@ class NaverIE(NaverBaseIE):
|
||||
'upload_date': '20130903',
|
||||
'uploader': '메가스터디, 합격불변의 법칙',
|
||||
'uploader_id': 'megastudy',
|
||||
'uploader_url': 'https://tv.naver.com/megastudy',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'duration': 2118,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.naver.com/v/395837',
|
||||
'md5': '8a38e35354d26a17f73f4e90094febd3',
|
||||
'md5': '7791205fa89dbed2f5e3eb16d287ff05',
|
||||
'info_dict': {
|
||||
'id': '395837',
|
||||
'ext': 'mp4',
|
||||
'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
|
||||
'description': 'md5:eb6aca9d457b922e43860a2a2b1984d3',
|
||||
'description': 'md5:c76be23e21403a6473d8119678cdb5cb',
|
||||
'timestamp': 1432030253,
|
||||
'upload_date': '20150519',
|
||||
'uploader': '4가지쇼 시즌2',
|
||||
'uploader_id': 'wrappinguser29',
|
||||
'uploader': '4가지쇼',
|
||||
'uploader_id': '4show',
|
||||
'uploader_url': 'https://tv.naver.com/4show',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'duration': 277,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
'skip': 'Georestricted',
|
||||
}, {
|
||||
'url': 'http://tvcast.naver.com/v/81652',
|
||||
'only_matching': True,
|
||||
@ -147,56 +175,63 @@ class NaverIE(NaverBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://tv.naver.com/api/json/v/' + video_id,
|
||||
video_id, headers=self.geo_verification_headers())
|
||||
player_info_json = content.get('playerInfoJson') or {}
|
||||
current_clip = player_info_json.get('currentClip') or {}
|
||||
data = self._call_api(f'/clips/{video_id}/play-info', video_id)
|
||||
|
||||
vid = current_clip.get('videoId')
|
||||
in_key = current_clip.get('inKey')
|
||||
vid = traverse_obj(data, ('clip', 'videoId', {str}))
|
||||
in_key = traverse_obj(data, ('play', 'inKey', {str}))
|
||||
|
||||
if not vid or not in_key:
|
||||
player_auth = try_get(player_info_json, lambda x: x['playerOption']['auth'])
|
||||
if player_auth == 'notCountry':
|
||||
self.raise_geo_restricted(countries=['KR'])
|
||||
elif player_auth == 'notLogin':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError('couldn\'t extract vid and key')
|
||||
raise ExtractorError('Unable to extract video info')
|
||||
|
||||
info = self._extract_video_info(video_id, vid, in_key)
|
||||
info.update({
|
||||
'description': clean_html(current_clip.get('description')),
|
||||
'timestamp': int_or_none(current_clip.get('firstExposureTime'), 1000),
|
||||
'duration': parse_duration(current_clip.get('displayPlayTime')),
|
||||
'like_count': int_or_none(current_clip.get('recommendPoint')),
|
||||
'age_limit': 19 if current_clip.get('adult') else None,
|
||||
})
|
||||
info.update(traverse_obj(data, ('clip', {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'timestamp': ('firstExposureDatetime', {parse_iso8601}),
|
||||
'duration': ('playTime', {int_or_none}),
|
||||
'like_count': ('likeItCount', {int_or_none}),
|
||||
'view_count': ('playCount', {int_or_none}),
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'uploader': 'channelName',
|
||||
'uploader_id': 'channelId',
|
||||
'uploader_url': ('channelUrl', {url_or_none}),
|
||||
'age_limit': ('adultVideo', {lambda x: 19 if x else None}),
|
||||
})))
|
||||
return info
|
||||
|
||||
|
||||
class NaverLiveIE(InfoExtractor):
|
||||
class NaverLiveIE(NaverBaseIE):
|
||||
IE_NAME = 'Naver:live'
|
||||
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/l/(?P<id>\d+)'
|
||||
_GEO_BYPASS = False
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.naver.com/l/52010',
|
||||
'url': 'https://tv.naver.com/l/127062',
|
||||
'info_dict': {
|
||||
'id': '52010',
|
||||
'id': '127062',
|
||||
'ext': 'mp4',
|
||||
'title': '[LIVE] 뉴스특보 : "수도권 거리두기, 2주간 2단계로 조정"',
|
||||
'description': 'md5:df7f0c237a5ed5e786ce5c91efbeaab3',
|
||||
'channel_id': 'NTV-ytnnews24-0',
|
||||
'start_time': 1597026780000,
|
||||
'live_status': 'is_live',
|
||||
'channel': '뉴스는 YTN',
|
||||
'channel_id': 'ytnnews24',
|
||||
'title': 're:^대한민국 24시간 뉴스 채널 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:f938b5956711beab6f882314ffadf4d5',
|
||||
'start_time': 1677752280,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.naver.com/l/51549',
|
||||
'url': 'https://tv.naver.com/l/140535',
|
||||
'info_dict': {
|
||||
'id': '51549',
|
||||
'id': '140535',
|
||||
'ext': 'mp4',
|
||||
'title': '연합뉴스TV - 코로나19 뉴스특보',
|
||||
'description': 'md5:c655e82091bc21e413f549c0eaccc481',
|
||||
'channel_id': 'NTV-yonhapnewstv-0',
|
||||
'start_time': 1596406380000,
|
||||
'live_status': 'is_live',
|
||||
'channel': 'KBS뉴스',
|
||||
'channel_id': 'kbsnews',
|
||||
'start_time': 1696867320,
|
||||
'title': 're:^언제 어디서나! KBS 뉴스 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:6ad419c0bf2f332829bda3f79c295284',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.naver.com/l/54887',
|
||||
@ -205,55 +240,27 @@ class NaverLiveIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id, 'Downloading Page', 'Unable to download Page')
|
||||
secure_url = self._search_regex(r'sApiF:\s+(?:"|\')([^"\']+)', page, 'secureurl')
|
||||
|
||||
info = self._extract_video_info(video_id, secure_url)
|
||||
info.update({
|
||||
'description': self._og_search_description(page)
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
def _extract_video_info(self, video_id, url):
|
||||
video_data = self._download_json(url, video_id, headers=self.geo_verification_headers())
|
||||
meta = video_data.get('meta')
|
||||
status = meta.get('status')
|
||||
data = self._call_api(f'/live-end/normal/{video_id}/play-info?renewLastPlayDate=true', video_id)
|
||||
|
||||
status = traverse_obj(data, ('live', 'liveStatus'))
|
||||
if status == 'CLOSED':
|
||||
raise ExtractorError('Stream is offline.', expected=True)
|
||||
elif status != 'OPENED':
|
||||
raise ExtractorError('Unknown status %s' % status)
|
||||
|
||||
title = meta.get('title')
|
||||
stream_list = video_data.get('streams')
|
||||
|
||||
if stream_list is None:
|
||||
raise ExtractorError('Could not get stream data.', expected=True)
|
||||
|
||||
formats = []
|
||||
for quality in stream_list:
|
||||
if not quality.get('url'):
|
||||
continue
|
||||
|
||||
prop = quality.get('property')
|
||||
if prop.get('abr'): # This abr doesn't mean Average audio bitrate.
|
||||
continue
|
||||
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality.get('url'), video_id, 'mp4',
|
||||
m3u8_id=quality.get('qualityId'), live=True
|
||||
))
|
||||
raise ExtractorError(f'Unknown status {status!r}')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'channel_id': meta.get('channelId'),
|
||||
'channel_url': meta.get('channelUrl'),
|
||||
'thumbnail': meta.get('imgUrl'),
|
||||
'start_time': meta.get('startTime'),
|
||||
'categories': [meta.get('categoryId')],
|
||||
'formats': self._extract_m3u8_formats(
|
||||
traverse_obj(data, ('playbackBody', {json.loads}, 'media', 0, 'path')), video_id, live=True),
|
||||
**traverse_obj(data, ('live', {
|
||||
'title': 'title',
|
||||
'channel': 'channelName',
|
||||
'channel_id': 'channelId',
|
||||
'description': 'description',
|
||||
'like_count': (('likeCount', 'likeItCount'), {int_or_none}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'start_time': (('startTime', 'startDateTime', 'startYmdt'), {parse_iso8601}),
|
||||
}), get_all=False),
|
||||
'is_live': True
|
||||
}
|
||||
|
||||
|
@ -3,15 +3,15 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
OnDemandPagedList,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@ -263,19 +263,16 @@ class NewgroundsUserIE(InfoExtractor):
|
||||
def _fetch_page(self, channel_id, url, page):
|
||||
page += 1
|
||||
posts_info = self._download_json(
|
||||
f'{url}/page/{page}', channel_id,
|
||||
f'{url}?page={page}', channel_id,
|
||||
note=f'Downloading page {page}', headers={
|
||||
'Accept': 'application/json, text/javascript, */*; q = 0.01',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
sequence = posts_info.get('sequence', [])
|
||||
for year in sequence:
|
||||
posts = try_get(posts_info, lambda x: x['years'][str(year)]['items'])
|
||||
for post in posts:
|
||||
path, media_id = self._search_regex(
|
||||
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
|
||||
post, 'url', group=(1, 2))
|
||||
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
|
||||
for post in traverse_obj(posts_info, ('items', ..., ..., {str})):
|
||||
path, media_id = self._search_regex(
|
||||
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
|
||||
post, 'url', group=(1, 2))
|
||||
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
@ -1,10 +1,54 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NFBIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nfb\.ca/film/(?P<id>[^/?#&]+)'
|
||||
class NFBBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<site>nfb|onf)\.ca'
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
|
||||
def _extract_ep_data(self, webpage, video_id, fatal=False):
|
||||
return self._search_json(
|
||||
r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
|
||||
contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
|
||||
|
||||
def _extract_ep_info(self, data, video_id, slug=None):
|
||||
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'uploader': ('data_layer', 'episodeMaker', {str}),
|
||||
'release_year': ('data_layer', 'episodeYear', {int_or_none}),
|
||||
'episode': ('data_layer', 'episodeTitle', {str}),
|
||||
'season': ('data_layer', 'seasonTitle', {str}),
|
||||
'season_number': ('data_layer', 'seasonTitle', {parse_count}),
|
||||
'series': ('data_layer', 'seriesTitle', {str}),
|
||||
}), get_all=False)
|
||||
|
||||
return {
|
||||
**info,
|
||||
'id': video_id,
|
||||
'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '),
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)),
|
||||
}
|
||||
|
||||
|
||||
class NFBIE(NFBBaseIE):
|
||||
IE_NAME = 'nfb'
|
||||
IE_DESC = 'nfb.ca and onf.ca films and episodes'
|
||||
_VALID_URL = [
|
||||
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>film)/(?P<id>[^/?#&]+)',
|
||||
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+/s(?:ea|ai)son\d+/episode\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'note': 'NFB film',
|
||||
'url': 'https://www.nfb.ca/film/trafficopter/',
|
||||
'info_dict': {
|
||||
'id': 'trafficopter',
|
||||
@ -14,29 +58,192 @@ class NFBIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Barrie Howells',
|
||||
'release_year': 1972,
|
||||
'duration': 600.0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'ONF film',
|
||||
'url': 'https://www.onf.ca/film/mal-du-siecle/',
|
||||
'info_dict': {
|
||||
'id': 'mal-du-siecle',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le mal du siècle',
|
||||
'description': 'md5:1abf774d77569ebe603419f2d344102b',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Catherine Lepage',
|
||||
'release_year': 2019,
|
||||
'duration': 300.0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'NFB episode with English title',
|
||||
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/season1/episode9/',
|
||||
'info_dict': {
|
||||
'id': 'true-north-episode9-true-north-finale-making-it',
|
||||
'ext': 'mp4',
|
||||
'title': 'True North: Inside the Rise of Toronto Basketball - Finale: Making It',
|
||||
'description': 'We catch up with each player in the midst of their journey as they reflect on their road ahead.',
|
||||
'series': 'True North: Inside the Rise of Toronto Basketball',
|
||||
'release_year': 2018,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Finale: Making It',
|
||||
'episode_number': 9,
|
||||
'uploader': 'Ryan Sidhoo',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'ONF episode with French title',
|
||||
'url': 'https://www.onf.ca/serie/direction-nord-la-montee-du-basketball-a-toronto/saison1/episode9/',
|
||||
'info_dict': {
|
||||
'id': 'direction-nord-episode-9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Direction nord – La montée du basketball à Toronto - Finale : Réussir',
|
||||
'description': 'md5:349a57419b71432b97bf6083d92b029d',
|
||||
'series': 'Direction nord – La montée du basketball à Toronto',
|
||||
'release_year': 2018,
|
||||
'season': 'Saison 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Finale : Réussir',
|
||||
'episode_number': 9,
|
||||
'uploader': 'Ryan Sidhoo',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'NFB episode with French title (needs geo-bypass)',
|
||||
'url': 'https://www.nfb.ca/series/etoile-du-nord/saison1/episode1/',
|
||||
'info_dict': {
|
||||
'id': 'etoile-du-nord-episode-1-lobservation',
|
||||
'ext': 'mp4',
|
||||
'title': 'Étoile du Nord - L\'observation',
|
||||
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
|
||||
'series': 'Étoile du Nord',
|
||||
'release_year': 2023,
|
||||
'season': 'Saison 1',
|
||||
'season_number': 1,
|
||||
'episode': 'L\'observation',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Patrick Bossé',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'ONF episode with English title (needs geo-bypass)',
|
||||
'url': 'https://www.onf.ca/serie/north-star/season1/episode1/',
|
||||
'info_dict': {
|
||||
'id': 'north-star-episode-1-observation',
|
||||
'ext': 'mp4',
|
||||
'title': 'North Star - Observation',
|
||||
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
|
||||
'series': 'North Star',
|
||||
'release_year': 2023,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Observation',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Patrick Bossé',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'NFB episode with /film/ URL and English title (needs geo-bypass)',
|
||||
'url': 'https://www.nfb.ca/film/north-star-episode-1-observation/',
|
||||
'info_dict': {
|
||||
'id': 'north-star-episode-1-observation',
|
||||
'ext': 'mp4',
|
||||
'title': 'North Star - Observation',
|
||||
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
|
||||
'series': 'North Star',
|
||||
'release_year': 2023,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Observation',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Patrick Bossé',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'ONF episode with /film/ URL and French title (needs geo-bypass)',
|
||||
'url': 'https://www.onf.ca/film/etoile-du-nord-episode-1-lobservation/',
|
||||
'info_dict': {
|
||||
'id': 'etoile-du-nord-episode-1-lobservation',
|
||||
'ext': 'mp4',
|
||||
'title': 'Étoile du Nord - L\'observation',
|
||||
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
|
||||
'series': 'Étoile du Nord',
|
||||
'release_year': 2023,
|
||||
'season': 'Saison 1',
|
||||
'season_number': 1,
|
||||
'episode': 'L\'observation',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Patrick Bossé',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'Season 2 episode w/o episode num in id, extract from json ld',
|
||||
'url': 'https://www.onf.ca/film/liste-des-choses-qui-existent-saison-2-ours',
|
||||
'info_dict': {
|
||||
'id': 'liste-des-choses-qui-existent-saison-2-ours',
|
||||
'ext': 'mp4',
|
||||
'title': 'La liste des choses qui existent - L\'ours en peluche',
|
||||
'description': 'md5:d5e8d8fc5f3a7385a9cf0f509b37e28a',
|
||||
'series': 'La liste des choses qui existent',
|
||||
'release_year': 2022,
|
||||
'season': 'Saison 2',
|
||||
'season_number': 2,
|
||||
'episode': 'L\'ours en peluche',
|
||||
'episode_number': 12,
|
||||
'uploader': 'Francis Papillon',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'NFB film /embed/player/ page',
|
||||
'url': 'https://www.nfb.ca/film/afterlife/embed/player/',
|
||||
'info_dict': {
|
||||
'id': 'afterlife',
|
||||
'ext': 'mp4',
|
||||
'title': 'Afterlife',
|
||||
'description': 'md5:84951394f594f1fb1e62d9c43242fdf5',
|
||||
'release_year': 1978,
|
||||
'duration': 420.0,
|
||||
'uploader': 'Ishu Patel',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id')
|
||||
# Need to construct the URL since we match /embed/player/ URLs as well
|
||||
webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug)
|
||||
# type_ can change from film to serie(s) after redirect; new slug may have episode number
|
||||
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
|
||||
|
||||
webpage = self._download_webpage('https://www.nfb.ca/film/%s/' % video_id, video_id)
|
||||
embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
|
||||
r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
|
||||
video_id = self._match_id(embed_url) # embed url has unique slug
|
||||
player = self._download_webpage(embed_url, video_id, 'Downloading player page')
|
||||
if 'MESSAGE_GEOBLOCKED' in player:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
|
||||
iframe = self._html_search_regex(
|
||||
r'<[^>]+\bid=["\']player-iframe["\'][^>]*src=["\']([^"\']+)',
|
||||
webpage, 'iframe', default=None, fatal=True)
|
||||
if iframe.startswith('/'):
|
||||
iframe = f'https://www.nfb.ca{iframe}'
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
|
||||
video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
player = self._download_webpage(iframe, video_id)
|
||||
if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
|
||||
for fmt in fmts:
|
||||
fmt['format_note'] = 'described video'
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
source = self._html_search_regex(
|
||||
r'source:\s*\'([^\']+)',
|
||||
player, 'source', default=None, fatal=True)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(source, video_id, ext='mp4')
|
||||
|
||||
return {
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': self._html_search_regex(
|
||||
r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
|
||||
@ -45,14 +252,49 @@ class NFBIE(InfoExtractor):
|
||||
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
|
||||
webpage, 'description', default=None),
|
||||
'thumbnail': self._html_search_regex(
|
||||
r'poster:\s*\'([^\']+)',
|
||||
player, 'thumbnail', default=None),
|
||||
r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
|
||||
'uploader': self._html_search_regex(
|
||||
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
|
||||
webpage, 'uploader', default=None),
|
||||
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
|
||||
'release_year': int_or_none(self._html_search_regex(
|
||||
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
|
||||
webpage, 'release_year', default=None)),
|
||||
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
|
||||
|
||||
return merge_dicts({
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
}, info, self._search_json_ld(webpage, video_id, default={}))
|
||||
|
||||
|
||||
class NFBSeriesIE(NFBBaseIE):
|
||||
IE_NAME = 'nfb:series'
|
||||
IE_DESC = 'nfb.ca and onf.ca series'
|
||||
_VALID_URL = rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/',
|
||||
'playlist_mincount': 9,
|
||||
'info_dict': {
|
||||
'id': 'true-north-inside-the-rise-of-toronto-basketball',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.onf.ca/serie/la-liste-des-choses-qui-existent-serie/',
|
||||
'playlist_mincount': 26,
|
||||
'info_dict': {
|
||||
'id': 'la-liste-des-choses-qui-existent-serie',
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, episodes):
|
||||
for episode in traverse_obj(episodes, lambda _, v: NFBIE.suitable(v['embed_url'])):
|
||||
mobj = NFBIE._match_valid_url(episode['embed_url'])
|
||||
yield self.url_result(
|
||||
mobj[0], NFBIE, **self._extract_ep_info([episode], mobj.group('id')))
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, type_, series_id = self._match_valid_url(url).group('site', 'type', 'id')
|
||||
season_path = 'saison' if type_ == 'serie' else 'season'
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.{site}.ca/{type_}/{series_id}/{season_path}1/episode1', series_id)
|
||||
episodes = self._extract_ep_data(webpage, series_id, fatal=True)
|
||||
|
||||
return self.playlist_result(self._entries(episodes), series_id)
|
||||
|
@ -9,6 +9,7 @@ from ..utils import (
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
@ -473,22 +474,21 @@ class NhkRadiruIE(InfoExtractor):
|
||||
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
|
||||
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544',
|
||||
'skip': 'Episode expired on 2023-04-16',
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210',
|
||||
'skip': 'Episode expired on 2024-02-24',
|
||||
'info_dict': {
|
||||
'channel': 'NHK-FM',
|
||||
'uploader': 'NHK-FM',
|
||||
'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
|
||||
'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス',
|
||||
'id': '0449_01_3926210',
|
||||
'ext': 'm4a',
|
||||
'id': '0449_01_3853544',
|
||||
'series': 'ジャズ・トゥナイト',
|
||||
'uploader': 'NHK-FM',
|
||||
'channel': 'NHK-FM',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
|
||||
'timestamp': 1680969600,
|
||||
'title': 'ジャズ・トゥナイト NEWジャズ特集',
|
||||
'upload_date': '20230408',
|
||||
'release_timestamp': 1680962400,
|
||||
'release_date': '20230408',
|
||||
'was_live': True,
|
||||
'release_date': '20240217',
|
||||
'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811',
|
||||
'timestamp': 1708185600,
|
||||
'release_timestamp': 1708178400,
|
||||
'upload_date': '20240217',
|
||||
},
|
||||
}, {
|
||||
# playlist, airs every weekday so it should _hopefully_ be okay forever
|
||||
@ -519,7 +519,8 @@ class NhkRadiruIE(InfoExtractor):
|
||||
'series': 'らじる文庫 by ラジオ深夜便 ',
|
||||
'release_timestamp': 1481126700,
|
||||
'upload_date': '20211101',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'],
|
||||
}, {
|
||||
# news
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
|
||||
@ -539,9 +540,28 @@ class NhkRadiruIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
_API_URL_TMPL = None
|
||||
|
||||
def _extract_extended_description(self, episode_id, episode):
|
||||
service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')}))
|
||||
aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str}))
|
||||
detail_url = try_call(
|
||||
lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3))
|
||||
if not detail_url:
|
||||
return
|
||||
|
||||
full_meta = traverse_obj(
|
||||
self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False),
|
||||
('list', service, 0, {dict})) or {}
|
||||
return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta)
|
||||
|
||||
def _extract_episode_info(self, headline, programme_id, series_meta):
|
||||
episode_id = f'{programme_id}_{headline["headline_id"]}'
|
||||
episode = traverse_obj(headline, ('file_list', 0, {dict}))
|
||||
description = self._extract_extended_description(episode_id, episode)
|
||||
if not description:
|
||||
self.report_warning('Failed to get extended description, falling back to summary')
|
||||
description = traverse_obj(episode, ('file_title_sub', {str}))
|
||||
|
||||
return {
|
||||
**series_meta,
|
||||
@ -551,14 +571,21 @@ class NhkRadiruIE(InfoExtractor):
|
||||
'was_live': True,
|
||||
'series': series_meta.get('title'),
|
||||
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
|
||||
'description': description,
|
||||
**traverse_obj(episode, {
|
||||
'title': 'file_title',
|
||||
'description': 'file_title_sub',
|
||||
'timestamp': ('open_time', {unified_timestamp}),
|
||||
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._API_URL_TMPL:
|
||||
return
|
||||
api_config = self._download_xml(
|
||||
'https://www.nhk.or.jp/radio/config/config_web.xml', None, 'Downloading API config', fatal=False)
|
||||
NhkRadiruIE._API_URL_TMPL = try_call(lambda: f'https:{api_config.find(".//url_program_detail").text}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
|
||||
programme_id = f'{site_id}_{corner_id}'
|
||||
@ -665,7 +692,7 @@ class NhkRadiruLiveIE(InfoExtractor):
|
||||
|
||||
noa_info = self._download_json(
|
||||
f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text),
|
||||
station, note=f'Downloading {area} station metadata')
|
||||
station, note=f'Downloading {area} station metadata', fatal=False)
|
||||
present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present'))
|
||||
|
||||
return {
|
||||
|
@ -172,9 +172,6 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
_COMMENT_API_ENDPOINTS = (
|
||||
'https://nvcomment.nicovideo.jp/legacy/api.json',
|
||||
'https://nmsg.nicovideo.jp/api.json',)
|
||||
_API_HEADERS = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0',
|
||||
@ -470,93 +467,16 @@ class NiconicoIE(InfoExtractor):
|
||||
parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
|
||||
or get_video_info('duration')),
|
||||
'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
|
||||
'subtitles': self.extract_subtitles(video_id, api_data, session_api_data),
|
||||
'subtitles': self.extract_subtitles(video_id, api_data),
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, api_data, session_api_data):
|
||||
comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey'))
|
||||
user_id_str = session_api_data.get('serviceUserId')
|
||||
|
||||
thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive']))
|
||||
legacy_danmaku = self._extract_legacy_comments(video_id, thread_ids, user_id_str, comment_user_key) or []
|
||||
|
||||
new_comments = traverse_obj(api_data, ('comment', 'nvComment'))
|
||||
new_danmaku = self._extract_new_comments(
|
||||
new_comments.get('server'), video_id,
|
||||
new_comments.get('params'), new_comments.get('threadKey'))
|
||||
|
||||
if not legacy_danmaku and not new_danmaku:
|
||||
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
|
||||
return
|
||||
|
||||
return {
|
||||
'comments': [{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(legacy_danmaku + new_danmaku),
|
||||
}],
|
||||
}
|
||||
|
||||
def _extract_legacy_comments(self, video_id, threads, user_id, user_key):
|
||||
auth_data = {
|
||||
'user_id': user_id,
|
||||
'userkey': user_key,
|
||||
} if user_id and user_key else {'user_id': ''}
|
||||
|
||||
api_url = traverse_obj(threads, (..., 'server'), get_all=False)
|
||||
|
||||
# Request Start
|
||||
post_data = [{'ping': {'content': 'rs:0'}}]
|
||||
for i, thread in enumerate(threads):
|
||||
thread_id = thread['id']
|
||||
thread_fork = thread['fork']
|
||||
# Post Start (2N)
|
||||
post_data.append({'ping': {'content': f'ps:{i * 2}'}})
|
||||
post_data.append({'thread': {
|
||||
'fork': thread_fork,
|
||||
'language': 0,
|
||||
'nicoru': 3,
|
||||
'scores': 1,
|
||||
'thread': thread_id,
|
||||
'version': '20090904',
|
||||
'with_global': 1,
|
||||
**auth_data,
|
||||
}})
|
||||
# Post Final (2N)
|
||||
post_data.append({'ping': {'content': f'pf:{i * 2}'}})
|
||||
|
||||
# Post Start (2N+1)
|
||||
post_data.append({'ping': {'content': f'ps:{i * 2 + 1}'}})
|
||||
post_data.append({'thread_leaves': {
|
||||
# format is '<bottom of minute range>-<top of minute range>:<comments per minute>,<total last comments'
|
||||
# unfortunately NND limits (deletes?) comment returns this way, so you're only able to grab the last 1000 per language
|
||||
'content': '0-999999:999999,999999,nicoru:999999',
|
||||
'fork': thread_fork,
|
||||
'language': 0,
|
||||
'nicoru': 3,
|
||||
'scores': 1,
|
||||
'thread': thread_id,
|
||||
**auth_data,
|
||||
}})
|
||||
# Post Final (2N+1)
|
||||
post_data.append({'ping': {'content': f'pf:{i * 2 + 1}'}})
|
||||
# Request Final
|
||||
post_data.append({'ping': {'content': 'rf:0'}})
|
||||
|
||||
return self._download_json(
|
||||
f'{api_url}/api.json', video_id, data=json.dumps(post_data).encode(), fatal=False,
|
||||
headers={
|
||||
'Referer': f'https://www.nicovideo.jp/watch/{video_id}',
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
},
|
||||
note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
|
||||
|
||||
def _extract_new_comments(self, endpoint, video_id, params, thread_key):
|
||||
comments = self._download_json(
|
||||
f'{endpoint}/v1/threads', video_id, data=json.dumps({
|
||||
def _get_subtitles(self, video_id, api_data):
|
||||
comments_info = traverse_obj(api_data, ('comment', 'nvComment', {dict})) or {}
|
||||
danmaku = traverse_obj(self._download_json(
|
||||
f'{comments_info.get("server")}/v1/threads', video_id, data=json.dumps({
|
||||
'additionals': {},
|
||||
'params': params,
|
||||
'threadKey': thread_key,
|
||||
'params': comments_info.get('params'),
|
||||
'threadKey': comments_info.get('threadKey'),
|
||||
}).encode(), fatal=False,
|
||||
headers={
|
||||
'Referer': 'https://www.nicovideo.jp/',
|
||||
@ -566,8 +486,19 @@ class NiconicoIE(InfoExtractor):
|
||||
'x-frontend-id': '6',
|
||||
'x-frontend-version': '0',
|
||||
},
|
||||
note='Downloading comments (new)', errnote='Failed to download comments (new)')
|
||||
return traverse_obj(comments, ('data', 'threads', ..., 'comments', ...))
|
||||
note='Downloading comments', errnote='Failed to download comments'),
|
||||
('data', 'threads', ..., 'comments', ...))
|
||||
|
||||
if not danmaku:
|
||||
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
|
||||
return
|
||||
|
||||
return {
|
||||
'comments': [{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(danmaku),
|
||||
}],
|
||||
}
|
||||
|
||||
|
||||
class NiconicoPlaylistBaseIE(InfoExtractor):
|
||||
|
225
yt_dlp/extractor/ninaprotocol.py
Normal file
225
yt_dlp/extractor/ninaprotocol.py
Normal file
@ -0,0 +1,225 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, mimetype2ext, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NinaProtocolIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ninaprotocol.com/releases/3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
|
||||
'title': 'The Spatulas - March Chant',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'channel': 'ppm',
|
||||
'description': 'md5:bb9f9d39d8f786449cd5d0ff7c5772db',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'timestamp': 1701417610,
|
||||
'uploader': 'ppmrecs',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'display_id': 'the-spatulas-march-chant',
|
||||
'upload_date': '20231201',
|
||||
'album_artist': 'Post Present Medium ',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_1',
|
||||
'title': 'March Chant In April',
|
||||
'track': 'March Chant In April',
|
||||
'ext': 'mp3',
|
||||
'duration': 152,
|
||||
'track_number': 1,
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'uploader': 'ppmrecs',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'timestamp': 1701417610,
|
||||
'channel': 'ppm',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'upload_date': '20231201',
|
||||
'album_artist': 'Post Present Medium ',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_2',
|
||||
'title': 'Rescue Mission',
|
||||
'track': 'Rescue Mission',
|
||||
'ext': 'mp3',
|
||||
'duration': 212,
|
||||
'track_number': 2,
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'uploader': 'ppmrecs',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'channel': 'ppm',
|
||||
'upload_date': '20231201',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'timestamp': 1701417610,
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_3',
|
||||
'title': 'Slinger Style',
|
||||
'track': 'Slinger Style',
|
||||
'ext': 'mp3',
|
||||
'duration': 179,
|
||||
'track_number': 3,
|
||||
'timestamp': 1701417610,
|
||||
'upload_date': '20231201',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'uploader': 'ppmrecs',
|
||||
'channel': 'ppm',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_4',
|
||||
'title': 'Psychic Signal',
|
||||
'track': 'Psychic Signal',
|
||||
'ext': 'mp3',
|
||||
'duration': 220,
|
||||
'track_number': 4,
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'upload_date': '20231201',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'timestamp': 1701417610,
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'channel': 'ppm',
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'uploader': 'ppmrecs',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_5',
|
||||
'title': 'Curvy Color',
|
||||
'track': 'Curvy Color',
|
||||
'ext': 'mp3',
|
||||
'duration': 148,
|
||||
'track_number': 5,
|
||||
'timestamp': 1701417610,
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'channel': 'ppm',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'uploader': 'ppmrecs',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'upload_date': '20231201',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_6',
|
||||
'title': 'Caveman Star',
|
||||
'track': 'Caveman Star',
|
||||
'ext': 'mp3',
|
||||
'duration': 121,
|
||||
'track_number': 6,
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'uploader': 'ppmrecs',
|
||||
'timestamp': 1701417610,
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'channel': 'ppm',
|
||||
'upload_date': '20231201',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://www.ninaprotocol.com/releases/f-g-s-american-shield',
|
||||
'info_dict': {
|
||||
'id': '76PZnJwaMgViQHYfA4NYJXds7CmW6vHQKAtQUxGene6J',
|
||||
'description': 'md5:63f08d5db558b4b36e1896f317062721',
|
||||
'title': 'F.G.S. - American Shield',
|
||||
'uploader_id': 'Ej3rozs11wYqFk1Gs6oggGCkGLz8GzBhmJfnUxf6gPci',
|
||||
'channel_id': '6JuksCZPXuP16wJ1BUfwuukJzh42C7guhLrFPPkVJfyE',
|
||||
'channel': 'tinkscough',
|
||||
'tags': [],
|
||||
'album_artist': 'F.G.S.',
|
||||
'album': 'F.G.S. - American Shield',
|
||||
'thumbnail': 'https://www.arweave.net/YJpgImkXLT9SbpFb576KuZ5pm6bdvs452LMs3Rx6lm8',
|
||||
'display_id': 'f-g-s-american-shield',
|
||||
'uploader': 'flannerysilva',
|
||||
'timestamp': 1702395858,
|
||||
'upload_date': '20231212',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
'url': 'https://www.ninaprotocol.com/releases/time-to-figure-things-out',
|
||||
'info_dict': {
|
||||
'id': '6Zi1nC5hj6b13NkpxVYwRhFy6mYA7oLBbe9DMrgGDcYh',
|
||||
'display_id': 'time-to-figure-things-out',
|
||||
'description': 'md5:960202ed01c3134bb8958f1008527e35',
|
||||
'timestamp': 1706283607,
|
||||
'title': 'DJ STEPDAD - time to figure things out',
|
||||
'album_artist': 'DJ STEPDAD',
|
||||
'uploader': 'tddvsss',
|
||||
'upload_date': '20240126',
|
||||
'album': 'time to figure things out',
|
||||
'uploader_id': 'AXQNRgTyYsySyAMFDwxzumuGjfmoXshorCesjpquwCBi',
|
||||
'thumbnail': 'https://www.arweave.net/O4i8bcKVqJVZvNeHHFp6r8knpFGh9ZwEgbeYacr4nss',
|
||||
'tags': [],
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
release = self._download_json(
|
||||
f'https://api.ninaprotocol.com/v1/releases/{video_id}', video_id)['release']
|
||||
|
||||
video_id = release.get('publicKey') or video_id
|
||||
|
||||
common_info = traverse_obj(release, {
|
||||
'album': ('metadata', 'properties', 'title', {str}),
|
||||
'album_artist': ((('hub', 'data'), 'publisherAccount'), 'displayName', {str}),
|
||||
'timestamp': ('datetime', {parse_iso8601}),
|
||||
'thumbnail': ('metadata', 'image', {url_or_none}),
|
||||
'uploader': ('publisherAccount', 'handle', {str}),
|
||||
'uploader_id': ('publisherAccount', 'publicKey', {str}),
|
||||
'channel': ('hub', 'handle', {str}),
|
||||
'channel_id': ('hub', 'publicKey', {str}),
|
||||
}, get_all=False)
|
||||
common_info['tags'] = traverse_obj(release, ('metadata', 'properties', 'tags', ..., {str}))
|
||||
|
||||
entries = []
|
||||
for track_num, track in enumerate(traverse_obj(release, (
|
||||
'metadata', 'properties', 'files', lambda _, v: url_or_none(v['uri']))), 1):
|
||||
entries.append({
|
||||
'id': f'{video_id}_{track_num}',
|
||||
'url': track['uri'],
|
||||
**traverse_obj(track, {
|
||||
'title': ('track_title', {str}),
|
||||
'track': ('track_title', {str}),
|
||||
'ext': ('type', {mimetype2ext}),
|
||||
'track_number': ('track', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'vcodec': 'none',
|
||||
**common_info,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'entries': entries,
|
||||
**traverse_obj(release, {
|
||||
'display_id': ('slug', {str}),
|
||||
'title': ('metadata', 'name', {str}),
|
||||
'description': ('metadata', 'description', {str}),
|
||||
}),
|
||||
**common_info,
|
||||
}
|
72
yt_dlp/extractor/ninenews.py
Normal file
72
yt_dlp/extractor/ninenews.py
Normal file
@ -0,0 +1,72 @@
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..utils import ExtractorError
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NineNewsIE(InfoExtractor):
|
||||
IE_NAME = '9News'
|
||||
_VALID_URL = r'https?://(?:www\.)?9news\.com\.au/(?:[\w-]+/){2,3}(?P<id>[\w-]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.9news.com.au/videos/national/fair-trading-pulls-dozens-of-toys-from-shelves/clqgc7dvj000y0jnvfism0w5m',
|
||||
'md5': 'd1a65b2e9d126e5feb9bc5cb96e62c80',
|
||||
'info_dict': {
|
||||
'id': '6343717246112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fair Trading pulls dozens of toys from shelves',
|
||||
'description': 'Fair Trading Australia have been forced to pull dozens of toys from shelves over hazard fears.',
|
||||
'thumbnail': 'md5:bdbe44294e2323b762d97acf8843f66c',
|
||||
'duration': 93.44,
|
||||
'timestamp': 1703231748,
|
||||
'upload_date': '20231222',
|
||||
'uploader_id': '664969388001',
|
||||
'tags': ['networkclip', 'aunews_aunationalninenews', 'christmas presents', 'toys', 'fair trading', 'au_news'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.9news.com.au/world/tape-reveals-donald-trump-pressured-michigan-officials-not-to-certify-2020-vote-a-new-report-says/0b8b880e-7d3c-41b9-b2bd-55bc7e492259',
|
||||
'md5': 'a885c44d20898c3e70e9a53e8188cea1',
|
||||
'info_dict': {
|
||||
'id': '6343587450112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trump found ineligible to run for president by state court',
|
||||
'description': 'md5:40e6e7db7a4ac6be0e960569a5af6066',
|
||||
'thumbnail': 'md5:3e132c48c186039fd06c10787de9bff2',
|
||||
'duration': 104.64,
|
||||
'timestamp': 1703058034,
|
||||
'upload_date': '20231220',
|
||||
'uploader_id': '664969388001',
|
||||
'tags': ['networkclip', 'aunews_aunationalninenews', 'ineligible', 'presidential candidate', 'donald trump', 'au_news'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.9news.com.au/national/outrage-as-parents-banned-from-giving-gifts-to-kindergarten-teachers/e19b49d4-a1a4-4533-9089-6e10e2d9386a',
|
||||
'info_dict': {
|
||||
'id': '6343716797112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Outrage as parents banned from giving gifts to kindergarten teachers',
|
||||
'description': 'md5:7a8b0ed2f9e08875fd9a3e86e462bc46',
|
||||
'thumbnail': 'md5:5ee4d66717bdd0dee9fc9a705ef041b8',
|
||||
'duration': 91.307,
|
||||
'timestamp': 1703229584,
|
||||
'upload_date': '20231222',
|
||||
'uploader_id': '664969388001',
|
||||
'tags': ['networkclip', 'aunews_aunationalninenews', 'presents', 'teachers', 'kindergarten', 'au_news'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
initial_state = self._search_json(
|
||||
r'var\s+__INITIAL_STATE__\s*=', webpage, 'initial state', article_id)
|
||||
video_id = traverse_obj(
|
||||
initial_state, ('videoIndex', 'currentVideo', 'brightcoveId', {str}),
|
||||
('article', ..., 'media', lambda _, v: v['type'] == 'video', 'urn', {str}), get_all=False)
|
||||
account = traverse_obj(initial_state, (
|
||||
'videoIndex', 'config', (None, 'video'), 'account', {str}), get_all=False)
|
||||
|
||||
if not video_id or not account:
|
||||
raise ExtractorError('Unable to get the required video data')
|
||||
|
||||
return self.url_result(
|
||||
f'https://players.brightcove.net/{account}/default_default/index.html?videoId={video_id}',
|
||||
BrightcoveNewIE, video_id)
|
@ -135,14 +135,15 @@ class NovaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
|
||||
'md5': '249baab7d0104e186e78b0899c7d5f28',
|
||||
'md5': 'da8f3f1fcdaf9fb0f112a32a165760a3',
|
||||
'info_dict': {
|
||||
'id': '1757139',
|
||||
'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
|
||||
'id': '8OvQqEvV3MW',
|
||||
'display_id': '8OvQqEvV3MW',
|
||||
'ext': 'mp4',
|
||||
'title': 'Podzemní nemocnice v pražské Krči',
|
||||
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
'duration': 151,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
|
||||
@ -210,7 +211,7 @@ class NovaIE(InfoExtractor):
|
||||
|
||||
# novaplus
|
||||
embed_id = self._search_regex(
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media(?:tn)?\.cms\.nova\.cz/embed/([^/?#&"\']+)',
|
||||
webpage, 'embed url', default=None)
|
||||
if embed_id:
|
||||
return {
|
||||
|
199
yt_dlp/extractor/nuum.py
Normal file
199
yt_dlp/extractor/nuum.py
Normal file
@ -0,0 +1,199 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
UserNotLive,
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NuumBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, video_id, description, query={}):
|
||||
response = self._download_json(
|
||||
f'https://nuum.ru/api/v2/{path}', video_id, query=query,
|
||||
note=f'Downloading {description} metadata',
|
||||
errnote=f'Unable to download {description} metadata')
|
||||
if error := response.get('error'):
|
||||
raise ExtractorError(f'API returned error: {error!r}')
|
||||
return response['result']
|
||||
|
||||
def _get_channel_info(self, channel_name):
|
||||
return self._call_api(
|
||||
'broadcasts/public', video_id=channel_name, description='channel',
|
||||
query={
|
||||
'with_extra': 'true',
|
||||
'channel_name': channel_name,
|
||||
'with_deleted': 'true',
|
||||
})
|
||||
|
||||
def _parse_video_data(self, container, extract_formats=True):
|
||||
stream = traverse_obj(container, ('media_container_streams', 0, {dict})) or {}
|
||||
media = traverse_obj(stream, ('stream_media', 0, {dict})) or {}
|
||||
media_url = traverse_obj(media, (
|
||||
'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False)
|
||||
|
||||
video_id = str(container['media_container_id'])
|
||||
is_live = media.get('media_status') == 'RUNNING'
|
||||
|
||||
formats, subtitles = None, None
|
||||
if extract_formats:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', live=is_live)
|
||||
|
||||
return filter_dict({
|
||||
'id': video_id,
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(container, {
|
||||
'title': ('media_container_name', {str}),
|
||||
'description': ('media_container_description', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'channel': ('media_container_channel', 'channel_name', {str}),
|
||||
'channel_id': ('media_container_channel', 'channel_id', {str_or_none}),
|
||||
}),
|
||||
**traverse_obj(stream, {
|
||||
'view_count': ('stream_total_viewers', {int_or_none}),
|
||||
'concurrent_view_count': ('stream_current_viewers', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(media, {
|
||||
'duration': ('media_duration', {int_or_none}),
|
||||
'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}),
|
||||
}, get_all=False),
|
||||
})
|
||||
|
||||
|
||||
class NuumMediaIE(NuumBaseIE):
|
||||
IE_NAME = 'nuum:media'
|
||||
_VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P<id>[\d]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
|
||||
'md5': 'f1d9118a30403e32b702a204eb03aca3',
|
||||
'info_dict': {
|
||||
'id': '1567547',
|
||||
'ext': 'mp4',
|
||||
'title': 'Toxi$ - Hurtz',
|
||||
'description': '',
|
||||
'timestamp': 1702631651,
|
||||
'upload_date': '20231215',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'channel_id': '6911',
|
||||
'channel': 'toxis',
|
||||
'duration': 116,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://nuum.ru/clips/1552564-pro-misu',
|
||||
'md5': 'b248ae1565b1e55433188f11beeb0ca1',
|
||||
'info_dict': {
|
||||
'id': '1552564',
|
||||
'ext': 'mp4',
|
||||
'title': 'Про Мису 🙃',
|
||||
'timestamp': 1701971828,
|
||||
'upload_date': '20231207',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'channel_id': '3320',
|
||||
'channel': 'Misalelik',
|
||||
'duration': 41,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media')
|
||||
|
||||
return self._parse_video_data(video_data)
|
||||
|
||||
|
||||
class NuumLiveIE(NuumBaseIE):
|
||||
IE_NAME = 'nuum:live'
|
||||
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://nuum.ru/channel/mts_live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel = self._match_id(url)
|
||||
channel_info = self._get_channel_info(channel)
|
||||
if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False:
|
||||
raise UserNotLive(video_id=channel)
|
||||
|
||||
info = self._parse_video_data(channel_info['media_container'])
|
||||
return {
|
||||
'webpage_url': f'https://nuum.ru/streams/{info["id"]}',
|
||||
'extractor_key': NuumMediaIE.ie_key(),
|
||||
'extractor': NuumMediaIE.IE_NAME,
|
||||
**info,
|
||||
}
|
||||
|
||||
|
||||
class NuumTabIE(NuumBaseIE):
|
||||
IE_NAME = 'nuum:tab'
|
||||
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
|
||||
_TESTS = [{
|
||||
'url': 'https://nuum.ru/channel/dankon_/clips',
|
||||
'info_dict': {
|
||||
'id': 'dankon__clips',
|
||||
'title': 'Dankon_',
|
||||
},
|
||||
'playlist_mincount': 29,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/channel/dankon_/videos',
|
||||
'info_dict': {
|
||||
'id': 'dankon__videos',
|
||||
'title': 'Dankon_',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/channel/dankon_/streams',
|
||||
'info_dict': {
|
||||
'id': 'dankon__streams',
|
||||
'title': 'Dankon_',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, channel_id, tab_type, tab_id, page):
|
||||
CONTAINER_TYPES = {
|
||||
'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
|
||||
'videos': ['LONG_VIDEO'],
|
||||
'streams': ['SINGLE'],
|
||||
}
|
||||
|
||||
media_containers = self._call_api(
|
||||
'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}',
|
||||
query={
|
||||
'limit': self._PAGE_SIZE,
|
||||
'offset': page * self._PAGE_SIZE,
|
||||
'channel_id': channel_id,
|
||||
'media_container_status': 'STOPPED',
|
||||
'media_container_type': CONTAINER_TYPES[tab_type],
|
||||
})
|
||||
for container in traverse_obj(media_containers, (..., {dict})):
|
||||
metadata = self._parse_video_data(container, extract_formats=False)
|
||||
yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
|
||||
tab_id = f'{channel_name}_{tab_type}'
|
||||
channel_data = self._get_channel_info(channel_name)['channel']
|
||||
|
||||
return self.playlist_result(OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE),
|
||||
playlist_id=tab_id, playlist_title=channel_data.get('channel_name'))
|
@ -1,50 +1,93 @@
|
||||
import hmac
|
||||
import hashlib
|
||||
import base64
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NYTimesBaseIE(InfoExtractor):
|
||||
_SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v'
|
||||
_DNS_NAMESPACE = uuid.UUID('36dd619a-56dc-595b-9e09-37f4152c7b5d')
|
||||
_TOKEN = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuNIzKBOFB77aT/jN/FQ+/QVKWq5V1ka1AYmCR9hstz1pGNPH5ajOU9gAqta0T89iPnhjwla+3oec/Z3kGjxbpv6miQXufHFq3u2RC6HyU458cLat5kVPSOQCe3VVB5NRpOlRuwKHqn0txfxnwSSj8mqzstR997d3gKB//RO9zE16y3PoWlDQXkASngNJEWvL19iob/xwAkfEWCjyRILWFY0JYX3AvLMSbq7wsqOCE5srJpo7rRU32zsByhsp1D5W9OYqqwDmflsgCEQy2vqTsJjrJohuNg+urMXNNZ7Y3naMoqttsGDrWVxtPBafKMI8pM2ReNZBbGQsQXRzQNo7+QIDAQAB'
|
||||
_GRAPHQL_API = 'https://samizdat-graphql.nytimes.com/graphql/v2'
|
||||
_GRAPHQL_QUERY = '''query VideoQuery($id: String!) {
|
||||
video(id: $id) {
|
||||
... on Video {
|
||||
bylines {
|
||||
renderedRepresentation
|
||||
}
|
||||
duration
|
||||
firstPublished
|
||||
promotionalHeadline
|
||||
promotionalMedia {
|
||||
... on Image {
|
||||
crops {
|
||||
name
|
||||
renditions {
|
||||
name
|
||||
width
|
||||
height
|
||||
url
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
renditions {
|
||||
type
|
||||
width
|
||||
height
|
||||
url
|
||||
bitrate
|
||||
}
|
||||
summary
|
||||
}
|
||||
}
|
||||
}'''
|
||||
|
||||
def _extract_video_from_id(self, video_id):
|
||||
# Authorization generation algorithm is reverse engineered from `signer` in
|
||||
# http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js
|
||||
path = '/svc/video/api/v3/video/' + video_id
|
||||
hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest()
|
||||
video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={
|
||||
'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(),
|
||||
'X-NYTV': 'vhs',
|
||||
}, fatal=False)
|
||||
if not video_data:
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/' + video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
def _call_api(self, media_id):
|
||||
# reference: `id-to-uri.js`
|
||||
video_uuid = uuid.uuid5(self._DNS_NAMESPACE, 'video')
|
||||
media_uuid = uuid.uuid5(video_uuid, media_id)
|
||||
|
||||
title = video_data['headline']
|
||||
return traverse_obj(self._download_json(
|
||||
self._GRAPHQL_API, media_id, 'Downloading JSON from GraphQL API', data=json.dumps({
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {'id': f'nyt://video/{media_uuid}'},
|
||||
}, separators=(',', ':')).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Nyt-App-Type': 'vhs',
|
||||
'Nyt-App-Version': 'v3.52.21',
|
||||
'Nyt-Token': self._TOKEN,
|
||||
'Origin': 'https://nytimes.com',
|
||||
}, fatal=False), ('data', 'video', {dict})) or {}
|
||||
|
||||
def get_file_size(file_size):
|
||||
if isinstance(file_size, int):
|
||||
return file_size
|
||||
elif isinstance(file_size, dict):
|
||||
return int(file_size.get('value', 0))
|
||||
else:
|
||||
return None
|
||||
def _extract_thumbnails(self, thumbs):
|
||||
return traverse_obj(thumbs, (lambda _, v: url_or_none(v['url']), {
|
||||
'url': 'url',
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}), default=None)
|
||||
|
||||
def _extract_formats_and_subtitles(self, video_id, content_media_json):
|
||||
urls = []
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for video in video_data.get('renditions', []):
|
||||
for video in traverse_obj(content_media_json, ('renditions', ..., {dict})):
|
||||
video_url = video.get('url')
|
||||
format_id = video.get('type')
|
||||
if not video_url or format_id == 'thumbs' or video_url in urls:
|
||||
@ -56,11 +99,9 @@ class NYTimesBaseIE(InfoExtractor):
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id or 'hls', fatal=False)
|
||||
formats.extend(m3u8_fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
|
||||
self._merge_subtitles(m3u8_subs, target=subtitles)
|
||||
elif ext == 'mpd':
|
||||
continue
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# video_url, video_id, format_id or 'dash', fatal=False))
|
||||
continue # all mpd urls give 404 errors
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
@ -68,55 +109,50 @@ class NYTimesBaseIE(InfoExtractor):
|
||||
'vcodec': video.get('videoencoding') or video.get('video_codec'),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
|
||||
'filesize': traverse_obj(video, (
|
||||
('file_size', 'fileSize'), (None, ('value')), {int_or_none}), get_all=False),
|
||||
'tbr': int_or_none(video.get('bitrate'), 1000) or None,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for image in video_data.get('images', []):
|
||||
image_url = image.get('url')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': 'http://www.nytimes.com/' + image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
})
|
||||
return formats, subtitles
|
||||
|
||||
publication_date = video_data.get('publication_date')
|
||||
timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
|
||||
def _extract_video(self, media_id):
|
||||
data = self._call_api(media_id)
|
||||
formats, subtitles = self._extract_formats_and_subtitles(media_id, data)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('summary'),
|
||||
'timestamp': timestamp,
|
||||
'uploader': video_data.get('byline'),
|
||||
'duration': float_or_none(video_data.get('duration'), 1000),
|
||||
'id': media_id,
|
||||
'title': data.get('promotionalHeadline'),
|
||||
'description': data.get('summary'),
|
||||
'timestamp': parse_iso8601(data.get('firstPublished')),
|
||||
'duration': float_or_none(data.get('duration'), scale=1000),
|
||||
'creator': ', '.join(traverse_obj(data, ( # TODO: change to 'creators'
|
||||
'bylines', ..., 'renderedRepresentation', {lambda x: remove_start(x, 'By ')}))),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnails': self._extract_thumbnails(
|
||||
traverse_obj(data, ('promotionalMedia', 'crops', ..., 'renditions', ...))),
|
||||
}
|
||||
|
||||
|
||||
class NYTimesIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': 'd665342765db043f7e225cff19df0f2d',
|
||||
'md5': 'a553aa344014e3723d33893d89d4defc',
|
||||
'info_dict': {
|
||||
'id': '100000002847155',
|
||||
'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'title': 'Verbatim: What Is a Photocopier?',
|
||||
'description': 'md5:93603dada88ddbda9395632fdc5da260',
|
||||
'timestamp': 1398631707,
|
||||
'upload_date': '20140427',
|
||||
'uploader': 'Brett Weiner',
|
||||
'timestamp': 1398646132,
|
||||
'upload_date': '20140428',
|
||||
'creator': 'Brett Weiner',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.+\.jpg',
|
||||
'duration': 419,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
|
||||
'only_matching': True,
|
||||
@ -125,138 +161,260 @@ class NYTimesIE(NYTimesBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
return self._extract_video_from_id(video_id)
|
||||
return self._extract_video(video_id)
|
||||
|
||||
|
||||
class NYTimesArticleIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/\d{4}/\d{2}/\d{2}/(?!books|podcasts)[^/?#]+/(?:\w+/)?(?P<id>[^./?#]+)(?:\.html)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
|
||||
'md5': 'e2076d58b4da18e6a001d53fd56db3c9',
|
||||
'md5': '3eb5ddb1d6f86254fe4f233826778737',
|
||||
'info_dict': {
|
||||
'id': '100000003628438',
|
||||
'ext': 'mov',
|
||||
'title': 'New Minimum Wage: $70,000 a Year',
|
||||
'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.',
|
||||
'timestamp': 1429033037,
|
||||
'ext': 'mp4',
|
||||
'title': 'One Company’s New Minimum Wage: $70,000 a Year',
|
||||
'description': 'md5:89ba9ab67ca767bb92bf823d1f138433',
|
||||
'timestamp': 1429047468,
|
||||
'upload_date': '20150414',
|
||||
'uploader': 'Matthew Williams',
|
||||
}
|
||||
'creator': 'Patricia Cohen',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 119.0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html',
|
||||
'md5': 'e0d52040cafb07662acf3c9132db3575',
|
||||
# article with audio and no video
|
||||
'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html',
|
||||
'md5': '2365b3555c8aa7f4dd34ca735ad02e6a',
|
||||
'info_dict': {
|
||||
'id': '100000004709062',
|
||||
'title': 'The Run-Up: ‘He Was Like an Octopus’',
|
||||
'id': '100000009110381',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:fb5c6b93b12efc51649b4847fe066ee4',
|
||||
'series': 'The Run-Up',
|
||||
'episode': '‘He Was Like an Octopus’',
|
||||
'episode_number': 20,
|
||||
'duration': 2130,
|
||||
}
|
||||
'title': 'The Gamble: Can Genetically Modified Mosquitoes End Disease?',
|
||||
'description': 'md5:9ff8b47acbaf7f3ca8c732f5c815be2e',
|
||||
'timestamp': 1695960700,
|
||||
'upload_date': '20230929',
|
||||
'creator': 'Stephanie Nolen, Natalija Gormalova',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 1322,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html',
|
||||
'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html',
|
||||
'md5': '3eb5ddb1d6f86254fe4f233826778737',
|
||||
'info_dict': {
|
||||
'id': '100000004709479',
|
||||
'title': 'The Rise of Hitler',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:bce877fd9e3444990cb141875fab0028',
|
||||
'creator': 'Pamela Paul',
|
||||
'duration': 3475,
|
||||
'id': '100000009202270',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kamala Harris Defends Biden Policies, but Says ‘More Work’ Needed to Reach Voters',
|
||||
'description': 'md5:de4212a7e19bb89e4fb14210ca915f1f',
|
||||
'timestamp': 1701290997,
|
||||
'upload_date': '20231129',
|
||||
'uploader': 'By The New York Times',
|
||||
'creator': 'Katie Rogers',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 97.631,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
|
||||
# multiple videos in the same article
|
||||
'url': 'https://www.nytimes.com/2023/12/02/business/air-traffic-controllers-safety.html',
|
||||
'info_dict': {
|
||||
'id': 'air-traffic-controllers-safety',
|
||||
'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink',
|
||||
'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d',
|
||||
'upload_date': '20231202',
|
||||
'creator': 'Emily Steel, Sydney Ember',
|
||||
'timestamp': 1701511264,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.nytimes.com/2023/12/02/business/media/netflix-squid-game-challenge.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_podcast_from_json(self, json, page_id, webpage):
|
||||
podcast_audio = self._parse_json(
|
||||
json, page_id, transform_source=js_to_json)
|
||||
def _extract_content_from_block(self, block):
|
||||
details = traverse_obj(block, {
|
||||
'id': ('sourceId', {str}),
|
||||
'uploader': ('bylines', ..., 'renderedRepresentation', {str}),
|
||||
'duration': (None, (('duration', {lambda x: float_or_none(x, scale=1000)}), ('length', {int_or_none}))),
|
||||
'timestamp': ('firstPublished', {parse_iso8601}),
|
||||
'series': ('podcastSeries', {str}),
|
||||
}, get_all=False)
|
||||
|
||||
audio_data = podcast_audio['data']
|
||||
track = audio_data['track']
|
||||
|
||||
episode_title = track['title']
|
||||
video_url = track['source']
|
||||
|
||||
description = track.get('description') or self._html_search_meta(
|
||||
['og:description', 'twitter:description'], webpage)
|
||||
|
||||
podcast_title = audio_data.get('podcast', {}).get('title')
|
||||
title = ('%s: %s' % (podcast_title, episode_title)
|
||||
if podcast_title else episode_title)
|
||||
|
||||
episode = audio_data.get('podcast', {}).get('episode') or ''
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'[Ee]pisode\s+(\d+)', episode, 'episode number', default=None))
|
||||
formats, subtitles = self._extract_formats_and_subtitles(details.get('id'), block)
|
||||
# audio articles will have an url and no formats
|
||||
url = traverse_obj(block, ('fileUrl', {url_or_none}))
|
||||
if not formats and url:
|
||||
formats.append({'url': url, 'vcodec': 'none'})
|
||||
|
||||
return {
|
||||
'id': remove_start(podcast_audio.get('target'), 'FT') or page_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'creator': track.get('credit'),
|
||||
'series': podcast_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
'duration': int_or_none(track.get('duration')),
|
||||
**details,
|
||||
'thumbnails': self._extract_thumbnails(traverse_obj(
|
||||
block, ('promotionalMedia', 'crops', ..., 'renditions', ...))),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
art_json = self._search_json(
|
||||
r'window\.__preloadedData\s*=', webpage, 'media details', page_id,
|
||||
transform_source=lambda x: x.replace('undefined', 'null'))['initialData']['data']['article']
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-videoid=["\'](\d+)', webpage, 'video id',
|
||||
default=None, fatal=False)
|
||||
if video_id is not None:
|
||||
return self._extract_video_from_id(video_id)
|
||||
blocks = traverse_obj(art_json, (
|
||||
'sprinkledBody', 'content', ..., ('ledeMedia', None),
|
||||
lambda _, v: v['__typename'] in ('Video', 'Audio')))
|
||||
if not blocks:
|
||||
raise ExtractorError('Unable to extract any media blocks from webpage')
|
||||
|
||||
podcast_data = self._search_regex(
|
||||
(r'NYTD\.FlexTypes\.push\s*\(\s*({.+?})\s*\)\s*;\s*</script',
|
||||
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
|
||||
webpage, 'podcast data')
|
||||
return self._extract_podcast_from_json(podcast_data, page_id, webpage)
|
||||
common_info = {
|
||||
'title': remove_end(self._html_extract_title(webpage), ' - The New York Times'),
|
||||
'description': traverse_obj(art_json, (
|
||||
'sprinkledBody', 'content', ..., 'summary', 'content', ..., 'text', {str}),
|
||||
get_all=False) or self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'timestamp': traverse_obj(art_json, ('firstPublished', {parse_iso8601})),
|
||||
'creator': ', '.join(
|
||||
traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName'))), # TODO: change to 'creators' (list)
|
||||
'thumbnails': self._extract_thumbnails(traverse_obj(
|
||||
art_json, ('promotionalMedia', 'assetCrops', ..., 'renditions', ...))),
|
||||
}
|
||||
|
||||
entries = []
|
||||
for block in blocks:
|
||||
entries.append(merge_dicts(self._extract_content_from_block(block), common_info))
|
||||
|
||||
if len(entries) > 1:
|
||||
return self.playlist_result(entries, page_id, **common_info)
|
||||
|
||||
return {
|
||||
'id': page_id,
|
||||
**entries[0],
|
||||
}
|
||||
|
||||
|
||||
class NYTimesCookingIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P<id>\d+)'
|
||||
IE_NAME = 'NYTimesCookingGuide'
|
||||
_VALID_URL = r'https?://cooking\.nytimes\.com/guides/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
|
||||
'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
|
||||
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
|
||||
'info_dict': {
|
||||
'id': '100000004756089',
|
||||
'ext': 'mov',
|
||||
'timestamp': 1479383008,
|
||||
'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON',
|
||||
'title': 'Cranberry Tart',
|
||||
'upload_date': '20161117',
|
||||
'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.',
|
||||
'id': '13-how-to-cook-a-turkey',
|
||||
'title': 'How to Cook a Turkey',
|
||||
'description': 'md5:726cfd3f9b161bdf5c279879e8050ca0',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# single video example
|
||||
'url': 'https://cooking.nytimes.com/guides/50-how-to-make-mac-and-cheese',
|
||||
'md5': '64415805fe0b8640fce6b0b9def5989a',
|
||||
'info_dict': {
|
||||
'id': '100000005835845',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Make Mac and Cheese',
|
||||
'description': 'md5:b8f2f33ec1fb7523b21367147c9594f1',
|
||||
'timestamp': 1522950315,
|
||||
'upload_date': '20180405',
|
||||
'duration': 9.51,
|
||||
'creator': 'Alison Roman',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
|
||||
'md5': '4b2e8c70530a89b8d905a2b572316eb8',
|
||||
'url': 'https://cooking.nytimes.com/guides/20-how-to-frost-a-cake',
|
||||
'md5': '64415805fe0b8640fce6b0b9def5989a',
|
||||
'info_dict': {
|
||||
'id': '100000003951728',
|
||||
'ext': 'mov',
|
||||
'timestamp': 1445509539,
|
||||
'description': 'Turkey guide',
|
||||
'upload_date': '20151022',
|
||||
'title': 'Turkey',
|
||||
}
|
||||
'id': '20-how-to-frost-a-cake',
|
||||
'title': 'How to Frost a Cake',
|
||||
'description': 'md5:a31fe3b98a8ce7b98aae097730c269cd',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
|
||||
description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'video id')
|
||||
lead_video_id = self._search_regex(
|
||||
r'data-video-player-id="(\d+)"></div>', webpage, 'lead video')
|
||||
media_ids = traverse_obj(
|
||||
get_elements_html_by_class('video-item', webpage), (..., {extract_attributes}, 'data-video-id'))
|
||||
|
||||
return self._extract_video_from_id(video_id)
|
||||
if media_ids:
|
||||
media_ids.append(lead_video_id)
|
||||
return self.playlist_result(
|
||||
[self._extract_video(media_id) for media_id in media_ids], page_id, title, description)
|
||||
|
||||
return {
|
||||
**self._extract_video(lead_video_id),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'creator': self._search_regex( # TODO: change to 'creators'
|
||||
r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None),
|
||||
}
|
||||
|
||||
|
||||
class NYTimesCookingRecipeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
|
||||
'md5': '579e83bbe8e61e9de67f80edba8a78a8',
|
||||
'info_dict': {
|
||||
'id': '1017817',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cranberry Curd Tart',
|
||||
'description': 'md5:ad77a3fc321db636256d4343c5742152',
|
||||
'timestamp': 1447804800,
|
||||
'upload_date': '20151118',
|
||||
'creator': 'David Tanis',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cooking.nytimes.com/recipes/1024781-neapolitan-checkerboard-cookies',
|
||||
'md5': '58df35998241dcf0620e99e646331b42',
|
||||
'info_dict': {
|
||||
'id': '1024781',
|
||||
'ext': 'mp4',
|
||||
'title': 'Neapolitan Checkerboard Cookies',
|
||||
'description': 'md5:ba12394c585ababea951cb6d2fcc6631',
|
||||
'timestamp': 1701302400,
|
||||
'upload_date': '20231130',
|
||||
'creator': 'Sue Li',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cooking.nytimes.com/recipes/1019516-overnight-oats',
|
||||
'md5': '2fe7965a3adc899913b8e25ada360823',
|
||||
'info_dict': {
|
||||
'id': '1019516',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1546387200,
|
||||
'description': 'md5:8856ce10239161bd2596ac335b9f9bfb',
|
||||
'upload_date': '20190102',
|
||||
'title': 'Overnight Oats',
|
||||
'creator': 'Genevieve Ko',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
recipe_data = self._search_nextjs_data(webpage, page_id)['props']['pageProps']['recipe']
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
recipe_data['videoSrc'], page_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
**traverse_obj(recipe_data, {
|
||||
'id': ('id', {str_or_none}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('topnote', {clean_html}),
|
||||
'timestamp': ('publishedAt', {int_or_none}),
|
||||
'creator': ('contentAttribution', 'cardByline', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': [{'url': thumb_url} for thumb_url in traverse_obj(
|
||||
recipe_data, ('image', 'crops', 'recipe', ..., {url_or_none}))],
|
||||
}
|
||||
|
@ -1,4 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
from ..utils import make_archive_id
|
||||
|
||||
|
||||
class OneFootballIE(InfoExtractor):
|
||||
@ -7,41 +9,43 @@ class OneFootballIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334',
|
||||
'info_dict': {
|
||||
'id': '34012334',
|
||||
'id': 'Y2VtcWAT',
|
||||
'ext': 'mp4',
|
||||
'title': 'Highlights: FC Zürich 3-3 FC Basel',
|
||||
'description': 'md5:33d9855cb790702c4fe42a513700aba8',
|
||||
'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34012334',
|
||||
'timestamp': 1635874604,
|
||||
'upload_date': '20211102'
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/Y2VtcWAT/poster.jpg?width=720',
|
||||
'timestamp': 1635874895,
|
||||
'upload_date': '20211102',
|
||||
'duration': 375.0,
|
||||
'tags': ['Football', 'Soccer', 'OneFootball'],
|
||||
'_old_archive_ids': ['onefootball 34012334'],
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
'params': {'skip_download': True},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020',
|
||||
'info_dict': {
|
||||
'id': '34041020',
|
||||
'id': 'leVJrMho',
|
||||
'ext': 'mp4',
|
||||
'title': 'Klopp fumes at VAR decisions in West Ham defeat',
|
||||
'description': 'md5:9c50371095a01ad3f63311c73d8f51a5',
|
||||
'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34041020',
|
||||
'timestamp': 1636314103,
|
||||
'upload_date': '20211107'
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/leVJrMho/poster.jpg?width=720',
|
||||
'timestamp': 1636315232,
|
||||
'upload_date': '20211107',
|
||||
'duration': 93.0,
|
||||
'tags': ['Football', 'Soccer', 'OneFootball'],
|
||||
'_old_archive_ids': ['onefootball 34041020'],
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
data_json = self._search_json_ld(webpage, id)
|
||||
m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/.+\.m3u8)', webpage, 'm3u8_url')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id)
|
||||
return {
|
||||
'id': id,
|
||||
'title': data_json.get('title'),
|
||||
'description': data_json.get('description'),
|
||||
'thumbnail': data_json.get('thumbnail'),
|
||||
'timestamp': data_json.get('timestamp'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
data_json.pop('url', None)
|
||||
m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/\w+\.m3u8)', webpage, 'm3u8_url')
|
||||
|
||||
return self.url_result(
|
||||
m3u8_url, JWPlatformIE, video_id, _old_archive_ids=[make_archive_id(self, video_id)],
|
||||
**data_json, url_transparent=True)
|
||||
|
@ -12,6 +12,8 @@ from ..compat import compat_str
|
||||
|
||||
|
||||
class OpenRecBaseIE(InfoExtractor):
|
||||
_M3U8_HEADERS = {'Referer': 'https://www.openrec.tv/'}
|
||||
|
||||
def _extract_pagestore(self, webpage, video_id):
|
||||
return self._parse_json(
|
||||
self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
|
||||
@ -21,7 +23,7 @@ class OpenRecBaseIE(InfoExtractor):
|
||||
if not m3u8_url:
|
||||
continue
|
||||
yield from self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id=name)
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id=name, headers=self._M3U8_HEADERS)
|
||||
|
||||
def _extract_movie(self, webpage, video_id, name, is_live):
|
||||
window_stores = self._extract_pagestore(webpage, video_id)
|
||||
@ -60,6 +62,7 @@ class OpenRecBaseIE(InfoExtractor):
|
||||
'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')),
|
||||
'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')),
|
||||
'is_live': is_live,
|
||||
'http_headers': self._M3U8_HEADERS,
|
||||
}
|
||||
|
||||
|
||||
@ -110,7 +113,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
|
||||
raise ExtractorError('Cannot extract title')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
capture_data.get('source'), video_id, ext='mp4')
|
||||
capture_data.get('source'), video_id, ext='mp4', headers=self._M3U8_HEADERS)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -121,6 +124,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
|
||||
'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str),
|
||||
'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str),
|
||||
'upload_date': unified_strdate(capture_data.get('createdAt')),
|
||||
'http_headers': self._M3U8_HEADERS,
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
import base64
|
||||
import functools
|
||||
import re
|
||||
|
||||
@ -565,3 +566,66 @@ class ORFFM4StoryIE(InfoExtractor):
|
||||
})
|
||||
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
class ORFONIE(InfoExtractor):
|
||||
IE_NAME = 'orf:on'
|
||||
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
|
||||
'info_dict': {
|
||||
'id': '14210000',
|
||||
'ext': 'mp4',
|
||||
'duration': 2651.08,
|
||||
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
|
||||
'title': 'School of Champions (4/8)',
|
||||
'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
|
||||
'media_type': 'episode',
|
||||
'timestamp': 1706472362,
|
||||
'upload_date': '20240128',
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_video(self, video_id, display_id):
|
||||
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
|
||||
api_json = self._download_json(
|
||||
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
|
||||
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
|
||||
if manifest_type == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
manifest_url, display_id, fatal=False, m3u8_id='hls')
|
||||
elif manifest_type == 'dash':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
manifest_url, display_id, fatal=False, mpd_id='dash')
|
||||
else:
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(api_json, {
|
||||
'duration': ('duration_second', {float_or_none}),
|
||||
'title': (('title', 'headline'), {str}),
|
||||
'description': (('description', 'teaser_text'), {str}),
|
||||
'media_type': ('video_type', {str}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
|
||||
'description': self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'], webpage, default=None),
|
||||
**self._search_json_ld(webpage, display_id, fatal=False),
|
||||
**self._extract_video(video_id, display_id),
|
||||
}
|
||||
|
@ -275,7 +275,7 @@ class PatreonIE(PatreonBaseIE):
|
||||
'ext': ext,
|
||||
'url': post_file['url'],
|
||||
}
|
||||
elif name == 'video':
|
||||
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
|
||||
return {
|
||||
**info,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class PiaproIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'piapro'
|
||||
_VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>\w+)/?'
|
||||
_VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>[\w-]+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://piapro.jp/t/NXYR',
|
||||
'md5': 'f7c0f760913fb1d44a1c45a4af793909',
|
||||
@ -49,6 +49,9 @@ class PiaproIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://piapro.jp/content/hcw0z3a169wtemz6',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://piapro.jp/t/-SO-',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
_login_status = False
|
||||
|
@ -1,10 +1,18 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, traverse_obj
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class PlaySuisseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'playsuisse'
|
||||
_VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
@ -134,12 +142,47 @@ class PlaySuisseIE(InfoExtractor):
|
||||
id
|
||||
url
|
||||
}'''
|
||||
_LOGIN_BASE_URL = 'https://login.srgssr.ch/srgssrlogin.onmicrosoft.com'
|
||||
_LOGIN_PATH = 'B2C_1A__SignInV2'
|
||||
_ID_TOKEN = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page = self._download_webpage(
|
||||
'https://www.playsuisse.ch/api/sso/login', None, note='Downloading login page',
|
||||
query={'x': 'x', 'locale': 'de', 'redirectUrl': 'https://www.playsuisse.ch/'})
|
||||
settings = self._search_json(r'var\s+SETTINGS\s*=', login_page, 'settings', None)
|
||||
|
||||
csrf_token = settings['csrf']
|
||||
query = {'tx': settings['transId'], 'p': self._LOGIN_PATH}
|
||||
|
||||
status = traverse_obj(self._download_json(
|
||||
f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/SelfAsserted', None, 'Logging in',
|
||||
query=query, headers={'X-CSRF-TOKEN': csrf_token}, data=urlencode_postdata({
|
||||
'request_type': 'RESPONSE',
|
||||
'signInName': username,
|
||||
'password': password
|
||||
}), expected_status=400), ('status', {int_or_none}))
|
||||
if status == 400:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
|
||||
urlh = self._request_webpage(
|
||||
f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/api/CombinedSigninAndSignup/confirmed',
|
||||
None, 'Downloading ID token', query={
|
||||
'rememberMe': 'false',
|
||||
'csrf_token': csrf_token,
|
||||
**query,
|
||||
'diags': '',
|
||||
})
|
||||
|
||||
self._ID_TOKEN = traverse_obj(parse_qs(urlh.url), ('id_token', 0))
|
||||
if not self._ID_TOKEN:
|
||||
raise ExtractorError('Login failed')
|
||||
|
||||
def _get_media_data(self, media_id):
|
||||
# NOTE In the web app, the "locale" header is used to switch between languages,
|
||||
# However this doesn't seem to take effect when passing the header here.
|
||||
response = self._download_json(
|
||||
'https://4bbepzm4ef.execute-api.eu-central-1.amazonaws.com/prod/graphql',
|
||||
'https://www.playsuisse.ch/api/graphql',
|
||||
media_id, data=json.dumps({
|
||||
'operationName': 'AssetWatch',
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
@ -150,6 +193,9 @@ class PlaySuisseIE(InfoExtractor):
|
||||
return response['data']['assetV2']
|
||||
|
||||
def _real_extract(self, url):
|
||||
if not self._ID_TOKEN:
|
||||
self.raise_login_required(method='password')
|
||||
|
||||
media_id = self._match_id(url)
|
||||
media_data = self._get_media_data(media_id)
|
||||
info = self._extract_single(media_data)
|
||||
@ -168,7 +214,8 @@ class PlaySuisseIE(InfoExtractor):
|
||||
if not media.get('url') or media.get('type') != 'HLS':
|
||||
continue
|
||||
f, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media['url'], media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
|
||||
update_url_query(media['url'], {'id_token': self._ID_TOKEN}),
|
||||
media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
|
||||
formats.extend(f)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
|
@ -87,8 +87,8 @@ class PornHubBaseIE(InfoExtractor):
|
||||
|
||||
def is_logged(webpage):
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'class=["\']signOut',
|
||||
r'>Sign\s+[Oo]ut\s*<'))
|
||||
r'id="profileMenuDropdown"',
|
||||
r'class="ph-icon-logout"'))
|
||||
|
||||
if is_logged(login_page):
|
||||
self._logged_in = True
|
||||
|
@ -18,7 +18,6 @@ from ..utils.traversal import traverse_obj
|
||||
class Pr0grammIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
|
||||
_TESTS = [{
|
||||
# Tags require account
|
||||
'url': 'https://pr0gramm.com/new/video/5466437',
|
||||
'info_dict': {
|
||||
'id': '5466437',
|
||||
@ -36,7 +35,6 @@ class Pr0grammIE(InfoExtractor):
|
||||
'_old_archive_ids': ['pr0grammstatic 5466437'],
|
||||
},
|
||||
}, {
|
||||
# Tags require account
|
||||
'url': 'https://pr0gramm.com/new/3052805:comment28391322',
|
||||
'info_dict': {
|
||||
'id': '3052805',
|
||||
@ -71,6 +69,23 @@ class Pr0grammIE(InfoExtractor):
|
||||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||
'_old_archive_ids': ['pr0grammstatic 5848332'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pr0gramm.com/top/5895149',
|
||||
'info_dict': {
|
||||
'id': '5895149',
|
||||
'ext': 'mp4',
|
||||
'title': 'pr0gramm-5895149 by algoholigSeeManThrower',
|
||||
'tags': 'count:19',
|
||||
'uploader': 'algoholigSeeManThrower',
|
||||
'uploader_id': 457556,
|
||||
'upload_timestamp': 1697580902,
|
||||
'upload_date': '20231018',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://thumb.pr0gramm.com/2023/10/18/db47bb3db5e1a1b3.jpg',
|
||||
'_old_archive_ids': ['pr0grammstatic 5895149'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pr0gramm.com/static/5466437',
|
||||
'only_matching': True,
|
||||
@ -92,15 +107,15 @@ class Pr0grammIE(InfoExtractor):
|
||||
def _maximum_flags(self):
|
||||
# We need to guess the flags for the content otherwise the api will raise an error
|
||||
# We can guess the maximum allowed flags for the account from the cookies
|
||||
# Bitflags are (msbf): nsfp, nsfl, nsfw, sfw
|
||||
flags = 0b0001
|
||||
# Bitflags are (msbf): pol, nsfp, nsfl, nsfw, sfw
|
||||
flags = 0b10001
|
||||
if self._is_logged_in:
|
||||
flags |= 0b1000
|
||||
flags |= 0b01000
|
||||
cookies = self._get_cookies(self.BASE_URL)
|
||||
if 'me' not in cookies:
|
||||
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
|
||||
if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
|
||||
flags |= 0b0110
|
||||
flags |= 0b00110
|
||||
|
||||
return flags
|
||||
|
||||
@ -134,14 +149,12 @@ class Pr0grammIE(InfoExtractor):
|
||||
if not source or not source.endswith('mp4'):
|
||||
self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
|
||||
|
||||
tags = None
|
||||
if self._is_logged_in:
|
||||
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
|
||||
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
|
||||
# Sorted by "confidence", higher confidence = earlier in list
|
||||
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
|
||||
if confidences:
|
||||
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
|
||||
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
|
||||
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
|
||||
# Sorted by "confidence", higher confidence = earlier in list
|
||||
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
|
||||
if confidences:
|
||||
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
|
||||
|
||||
formats = traverse_obj(video_info, ('variants', ..., {
|
||||
'format_id': ('name', {str}),
|
||||
|
@ -1,5 +1,8 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601, traverse_obj, try_call
|
||||
from ..utils import float_or_none, parse_iso8601, str_or_none, try_call
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PrankCastIE(InfoExtractor):
|
||||
@ -64,3 +67,71 @@ class PrankCastIE(InfoExtractor):
|
||||
'categories': [json_info.get('broadcast_category')],
|
||||
'tags': try_call(lambda: json_info['broadcast_tags'].split(','))
|
||||
}
|
||||
|
||||
|
||||
class PrankCastPostIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-',
|
||||
'info_dict': {
|
||||
'id': '6214',
|
||||
'ext': 'mp3',
|
||||
'title': 'Happy National Rachel Day!',
|
||||
'display_id': 'happy-national-rachel-day-',
|
||||
'timestamp': 1704333938,
|
||||
'uploader': 'Devonanustart',
|
||||
'channel_id': '4',
|
||||
'duration': 13175,
|
||||
'cast': ['Devonanustart'],
|
||||
'description': '',
|
||||
'categories': ['prank call'],
|
||||
'upload_date': '20240104'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-',
|
||||
'info_dict': {
|
||||
'id': '6217',
|
||||
'ext': 'mp3',
|
||||
'title': 'Jake the Work Crow!',
|
||||
'display_id': 'jake-the-work-crow-',
|
||||
'timestamp': 1704346592,
|
||||
'uploader': 'despicabledogs',
|
||||
'channel_id': '957',
|
||||
'duration': 263.287,
|
||||
'cast': ['despicabledogs'],
|
||||
'description': 'https://imgur.com/a/vtxLvKU',
|
||||
'categories': [],
|
||||
'upload_date': '20240104'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts']
|
||||
content = self._parse_json(post['post_contents_json'], video_id)[0]
|
||||
|
||||
uploader = post.get('user_name')
|
||||
guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': post.get('post_title') or self._og_search_title(webpage),
|
||||
'display_id': display_id,
|
||||
'url': content.get('url'),
|
||||
'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '),
|
||||
'uploader': uploader,
|
||||
'channel_id': str_or_none(post.get('user_id')),
|
||||
'duration': float_or_none(content.get('duration')),
|
||||
'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))),
|
||||
'description': post.get('post_body'),
|
||||
'categories': list(filter(None, [content.get('category')])),
|
||||
'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))),
|
||||
'subtitles': {
|
||||
'live_chat': [{
|
||||
'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=',
|
||||
'ext': 'json',
|
||||
}],
|
||||
} if post.get('content_id') else None
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
import base64
|
||||
import random
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -11,6 +12,7 @@ from ..utils import (
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RadikoBaseIE(InfoExtractor):
|
||||
@ -159,6 +161,12 @@ class RadikoBaseIE(InfoExtractor):
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_performers(self, prog):
|
||||
performers = traverse_obj(prog, (
|
||||
'pfm/text()', ..., {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip}))
|
||||
# TODO: change 'artist' fields to 'artists' and return traversal list instead of str
|
||||
return ', '.join(performers) or None
|
||||
|
||||
|
||||
class RadikoIE(RadikoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
|
||||
@ -186,10 +194,12 @@ class RadikoIE(RadikoBaseIE):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': try_call(lambda: prog.find('title').text),
|
||||
'artist': self._extract_performers(prog),
|
||||
'description': clean_html(try_call(lambda: prog.find('info').text)),
|
||||
'uploader': try_call(lambda: station_program.find('.//name').text),
|
||||
'uploader_id': station,
|
||||
'timestamp': vid_int,
|
||||
'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)),
|
||||
'is_live': True,
|
||||
'formats': self._extract_formats(
|
||||
video_id=video_id, station=station, is_onair=False,
|
||||
@ -243,6 +253,7 @@ class RadikoRadioIE(RadikoBaseIE):
|
||||
return {
|
||||
'id': station,
|
||||
'title': title,
|
||||
'artist': self._extract_performers(prog),
|
||||
'description': description,
|
||||
'uploader': station_name,
|
||||
'uploader_id': station,
|
||||
|
@ -1,6 +1,7 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
@ -91,7 +92,7 @@ class RaiBaseIE(InfoExtractor):
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
|
||||
if not audio_only and not is_live:
|
||||
formats.extend(self._create_http_urls(media_url, relinker_url, formats))
|
||||
formats.extend(self._create_http_urls(media_url, relinker_url, formats, video_id))
|
||||
|
||||
return filter_dict({
|
||||
'is_live': is_live,
|
||||
@ -99,7 +100,7 @@ class RaiBaseIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
def _create_http_urls(self, manifest_url, relinker_url, fmts):
|
||||
def _create_http_urls(self, manifest_url, relinker_url, fmts, video_id):
|
||||
_MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8'
|
||||
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
|
||||
_QUALITY = {
|
||||
@ -166,6 +167,14 @@ class RaiBaseIE(InfoExtractor):
|
||||
'fps': 25,
|
||||
}
|
||||
|
||||
# Check if MP4 download is available
|
||||
try:
|
||||
self._request_webpage(
|
||||
HEADRequest(_MP4_TMPL % (relinker_url, '*')), video_id, 'Checking MP4 availability')
|
||||
except ExtractorError as e:
|
||||
self.to_screen(f'{video_id}: MP4 direct download is not available: {e.cause}')
|
||||
return []
|
||||
|
||||
# filter out single-stream formats
|
||||
fmts = [f for f in fmts
|
||||
if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none']
|
||||
|
135
yt_dlp/extractor/redge.py
Normal file
135
yt_dlp/extractor/redge.py
Normal file
@ -0,0 +1,135 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RedCDNLivxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx'
|
||||
IE_NAME = 'redcdnlivx'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000',
|
||||
'info_dict': {
|
||||
'id': 'ENC02-638272860000-638292544000',
|
||||
'ext': 'mp4',
|
||||
'title': 'ENC02',
|
||||
'duration': 19683.982,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000',
|
||||
'info_dict': {
|
||||
'id': 'ENC18-722333096000-722335562000',
|
||||
'ext': 'mp4',
|
||||
'title': 'ENC18',
|
||||
'duration': 2463.995,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000',
|
||||
'info_dict': {
|
||||
'id': 'triathlon2018-warsaw-550305000000-550327620000',
|
||||
'ext': 'mp4',
|
||||
'title': 'triathlon2018/warsaw',
|
||||
'duration': 22619.98,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
"""
|
||||
Known methods (first in url path):
|
||||
- `livedash` - DASH MPD
|
||||
- `livehls` - HTTP Live Streaming
|
||||
- `livess` - IIS Smooth Streaming
|
||||
- `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac
|
||||
- `sc` - shoutcast/icecast (audio streams, like radio)
|
||||
"""
|
||||
|
||||
def _real_extract(self, url):
|
||||
tenant, path = self._match_valid_url(url).group('tenant', 'id')
|
||||
qs = parse_qs(url)
|
||||
start_time = traverse_obj(qs, ('startTime', 0, {int_or_none}))
|
||||
stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none}))
|
||||
|
||||
def livx_mode(mode):
|
||||
suffix = ''
|
||||
if mode == 'livess':
|
||||
suffix = '/manifest'
|
||||
elif mode == 'livehls':
|
||||
suffix = '/playlist.m3u8'
|
||||
file_qs = {}
|
||||
if start_time:
|
||||
file_qs['startTime'] = start_time
|
||||
if stop_time:
|
||||
file_qs['stopTime'] = stop_time
|
||||
if mode == 'nvr':
|
||||
file_qs['nolimit'] = 1
|
||||
elif mode != 'sc':
|
||||
file_qs['indexMode'] = 'true'
|
||||
return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs)
|
||||
|
||||
# no id or title for a transmission. making ones up.
|
||||
title = path \
|
||||
.replace('/live', '').replace('live/', '') \
|
||||
.replace('/channel', '').replace('channel/', '') \
|
||||
.strip('/')
|
||||
video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time)
|
||||
|
||||
formats = []
|
||||
# downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata
|
||||
ism_res = self._download_xml_handle(
|
||||
livx_mode('livess'), video_id,
|
||||
note='Downloading ISM manifest',
|
||||
errnote='Failed to download ISM manifest',
|
||||
fatal=False)
|
||||
ism_doc = None
|
||||
if ism_res is not False:
|
||||
ism_doc, ism_urlh = ism_res
|
||||
formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss')
|
||||
|
||||
nvr_urlh = self._request_webpage(
|
||||
HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False,
|
||||
expected_status=lambda _: True)
|
||||
if nvr_urlh and nvr_urlh.status == 200:
|
||||
formats.append({
|
||||
'url': nvr_urlh.url,
|
||||
'ext': 'flv',
|
||||
'format_id': 'direct-0',
|
||||
'preference': -1, # might be slow
|
||||
})
|
||||
formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False))
|
||||
|
||||
time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000
|
||||
duration = traverse_obj(
|
||||
ism_doc, ('@Duration', {functools.partial(float_or_none, scale=time_scale)})) or None
|
||||
|
||||
live_status = None
|
||||
if traverse_obj(ism_doc, '@IsLive') == 'TRUE':
|
||||
live_status = 'is_live'
|
||||
elif duration:
|
||||
live_status = 'was_live'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'live_status': live_status,
|
||||
}
|
@ -7,11 +7,12 @@ from ..utils import (
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class RedTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com(?:\.br)?/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.redtube.com/38864951',
|
||||
@ -34,6 +35,9 @@ class RedTubeIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://it.redtube.com/66418',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.redtube.com.br/103224331',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -79,7 +83,7 @@ class RedTubeIE(InfoExtractor):
|
||||
'media definitions', default='{}'),
|
||||
video_id, fatal=False)
|
||||
for media in medias if isinstance(medias, list) else []:
|
||||
format_url = url_or_none(media.get('videoUrl'))
|
||||
format_url = urljoin('https://www.redtube.com', media.get('videoUrl'))
|
||||
if not format_url:
|
||||
continue
|
||||
format_id = media.get('format')
|
||||
|
@ -1,8 +1,34 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import format_field, parse_iso8601
|
||||
from ..utils import (
|
||||
MEDIA_EXTENSIONS,
|
||||
determine_ext,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class RinseFMIE(InfoExtractor):
|
||||
class RinseFMBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _parse_entry(entry):
|
||||
return {
|
||||
**traverse_obj(entry, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'url': ('fileUrl', {url_or_none}),
|
||||
'release_timestamp': ('episodeDate', {parse_iso8601}),
|
||||
'thumbnail': ('featuredImage', 0, 'filename', {str},
|
||||
{lambda x: x and f'https://rinse.imgix.net/media/{x}'}),
|
||||
'webpage_url': ('slug', {str},
|
||||
{lambda x: x and f'https://rinse.fm/episodes/{x}'}),
|
||||
}),
|
||||
'vcodec': 'none',
|
||||
'extractor_key': RinseFMIE.ie_key(),
|
||||
'extractor': RinseFMIE.IE_NAME,
|
||||
}
|
||||
|
||||
|
||||
class RinseFMIE(RinseFMBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/',
|
||||
@ -22,12 +48,42 @@ class RinseFMIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
|
||||
|
||||
return {
|
||||
'id': entry['id'],
|
||||
'title': entry.get('title'),
|
||||
'url': entry['fileUrl'],
|
||||
'vcodec': 'none',
|
||||
'release_timestamp': parse_iso8601(entry.get('episodeDate')),
|
||||
'thumbnail': format_field(
|
||||
entry, [('featuredImage', 0, 'filename')], 'https://rinse.imgix.net/media/%s', default=None),
|
||||
}
|
||||
return self._parse_entry(entry)
|
||||
|
||||
|
||||
class RinseFMArtistPlaylistIE(RinseFMBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?rinse\.fm/shows/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://rinse.fm/shows/resources/',
|
||||
'info_dict': {
|
||||
'id': 'resources',
|
||||
'title': '[re]sources',
|
||||
'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.'
|
||||
},
|
||||
'playlist_mincount': 40
|
||||
}, {
|
||||
'url': 'https://rinse.fm/shows/ivy/',
|
||||
'info_dict': {
|
||||
'id': 'ivy',
|
||||
'title': '[IVY]',
|
||||
'description': 'A dedicated space for DNB/Turbo House and 4x4.'
|
||||
},
|
||||
'playlist_mincount': 7
|
||||
}]
|
||||
|
||||
def _entries(self, data):
|
||||
for episode in traverse_obj(data, (
|
||||
'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio)
|
||||
):
|
||||
yield self._parse_entry(episode)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._og_search_title(webpage) or self._html_search_meta('title', webpage)
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
data = self._search_nextjs_data(webpage, playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(data), playlist_id, title, description=description)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user