From 3f66b6fe50f8d5b545712f8b19d5ae62f5373980 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 27 May 2023 19:17:27 +1200 Subject: [PATCH] [core] Workaround erroneous urllib Windows proxy parsing (#7092) Convert proxies extracted from windows registry to http for older Python versions. See: https://github.com/python/cpython/issues/86793 Authored by: coletdjnz --- Makefile | 2 +- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/compat/urllib/__init__.py | 7 ++++++ yt_dlp/compat/urllib/request.py | 40 ++++++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 yt_dlp/compat/urllib/__init__.py create mode 100644 yt_dlp/compat/urllib/request.py diff --git a/Makefile b/Makefile index f03fe2052..b1ac0e7d6 100644 --- a/Makefile +++ b/Makefile @@ -74,7 +74,7 @@ offlinetest: codetest $(PYTHON) -m pytest -k "not download" # XXX: This is hard to maintain -CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/utils yt_dlp/dependencies +CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies yt-dlp: yt_dlp/*.py yt_dlp/*/*.py mkdir -p zip for d in $(CODE_FOLDERS) ; do \ diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index f69bc98c5..f49dbf07d 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -21,9 +21,9 @@ import time import tokenize import traceback import unicodedata -import urllib.request from .cache import Cache +from .compat import urllib # isort: split from .compat import compat_os_name, compat_shlex_quote from .cookies import load_cookies from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name diff --git a/yt_dlp/compat/urllib/__init__.py b/yt_dlp/compat/urllib/__init__.py new file mode 100644 index 000000000..6b6b8e103 --- /dev/null +++ b/yt_dlp/compat/urllib/__init__.py @@ -0,0 +1,7 @@ +# flake8: noqa: F405 +from urllib import * # noqa: F403 + +from ..compat_utils import passthrough_module + +passthrough_module(__name__, 'urllib') +del passthrough_module diff --git a/yt_dlp/compat/urllib/request.py b/yt_dlp/compat/urllib/request.py new file mode 100644 index 000000000..ff63b2f0e --- /dev/null +++ b/yt_dlp/compat/urllib/request.py @@ -0,0 +1,40 @@ +# flake8: noqa: F405 +from urllib.request import * # noqa: F403 + +from ..compat_utils import passthrough_module + +passthrough_module(__name__, 'urllib.request') +del passthrough_module + + +from .. import compat_os_name + +if compat_os_name == 'nt': + # On older python versions, proxies are extracted from Windows registry erroneously. [1] + # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2] + # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade + # it to http on these older python versions to avoid issues + # This also applies for ftp proxy type, as ftp:// proxy scheme is not supported. + # 1: https://github.com/python/cpython/issues/86793 + # 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698 + import sys + from urllib.request import getproxies_environment, getproxies_registry + + def getproxies_registry_patched(): + proxies = getproxies_registry() + if ( + sys.version_info >= (3, 10, 5) # https://docs.python.org/3.10/whatsnew/changelog.html#python-3-10-5-final + or (3, 9, 13) <= sys.version_info < (3, 10) # https://docs.python.org/3.9/whatsnew/changelog.html#python-3-9-13-final + ): + return proxies + + for scheme in ('https', 'ftp'): + if scheme in proxies and proxies[scheme].startswith(f'{scheme}://'): + proxies[scheme] = 'http' + proxies[scheme][len(scheme):] + + return proxies + + def getproxies(): + return getproxies_environment() or getproxies_registry_patched() + +del compat_os_name