Compare commits

..

3 Commits

Author SHA1 Message Date
coletdev
379a4f161d
[utils] Fix inconsistent default handling between HTTP and HTTPS requests (#4158)
Default headers such as `Content-Type` were only being added for HTTPS requests among other handling.

Fixes bug in be4a824d74

Authored-by: coletdjnz
2022-06-24 03:29:28 +00:00
Brett824
06cc8f103b
[extractor/youtube] Mark videos as fully watched (#4146)
* Also fixes videos appearing as shorts in watch history

Closes #2555
Authored by: Brett824
2022-06-23 16:30:17 -07:00
Jelle Besseling
34baaced11
[extractor/dropout] Support cookies and login only as needed (#4075)
Closes #4035
Authored by: pingiun, pukkandan
2022-06-23 16:21:03 -07:00
3 changed files with 44 additions and 29 deletions

View File

@ -119,16 +119,16 @@ class DropoutIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
login_err, webpage = False, ''
try: webpage = None
if self._get_cookies('https://www.dropout.tv').get('_session'):
webpage = self._download_webpage(url, display_id)
if not webpage or '<div id="watch-unauthorized"' in webpage:
login_err = self._login(display_id) login_err = self._login(display_id)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
finally: if login_err and '<div id="watch-unauthorized"' in webpage:
if not login_err:
self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out', fatal=False)
elif '<div id="watch-unauthorized"' in webpage:
if login_err is True: if login_err is True:
self.raise_login_required(method='password') self.raise_login_required(method='any')
raise ExtractorError(login_err, expected=True) raise ExtractorError(login_err, expected=True)
embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url') embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')

View File

@ -2643,30 +2643,45 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return sts return sts
def _mark_watched(self, video_id, player_responses): def _mark_watched(self, video_id, player_responses):
playback_url = get_first( for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'), label = 'fully ' if is_full else ''
expected_type=url_or_none) url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
if not playback_url: expected_type=url_or_none)
self.report_warning('Unable to mark watched') if not url:
return self.report_warning(f'Unable to mark {label}watched')
parsed_playback_url = compat_urlparse.urlparse(playback_url) return
qs = compat_urlparse.parse_qs(parsed_playback_url.query) parsed_url = compat_urlparse.urlparse(url)
qs = compat_urlparse.parse_qs(parsed_url.query)
# cpn generation algorithm is reverse engineered from base.js. # cpn generation algorithm is reverse engineered from base.js.
# In fact it works even with dummy cpn. # In fact it works even with dummy cpn.
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)) cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
qs.update({ # # more consistent results setting it to right before the end
'ver': ['2'], video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
'cpn': [cpn],
})
playback_url = compat_urlparse.urlunparse(
parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
self._download_webpage( qs.update({
playback_url, video_id, 'Marking watched', 'ver': ['2'],
'Unable to mark watched', fatal=False) 'cpn': [cpn],
'cmt': video_length,
'el': 'detailpage', # otherwise defaults to "shorts"
})
if is_full:
# these seem to mark watchtime "history" in the real world
# they're required, so send in a single value
qs.update({
'st': video_length,
'et': video_length,
})
url = compat_urlparse.urlunparse(
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
self._download_webpage(
url, video_id, f'Marking {label}watched',
'Unable to mark watched', fatal=False)
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):

View File

@ -1343,7 +1343,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
req.headers = handle_youtubedl_headers(req.headers) req.headers = handle_youtubedl_headers(req.headers)
return req return super().do_request_(req)
def http_response(self, req, resp): def http_response(self, req, resp):
old_resp = resp old_resp = resp