Compare commits

...

3 Commits

Author SHA1 Message Date
pukkandan
c043c24625
[extractor] Fix _create_request when headers is None
Closes #4164
2022-06-25 19:41:22 +05:30
FestplattenSchnitzel
74900105be
[extractor/ViMP] Add thumbnail and support more sites (#4147)
Authored by: FestplattenSchnitzel
2022-06-25 19:06:24 +05:30
HobbyistDev
d1bf2e199c
[extractor/fuyin] Add extractor (#4151)
Closes #2871

Authored by: HobbyistDev
2022-06-25 06:14:58 -07:00
5 changed files with 60 additions and 12 deletions

View File

@ -563,6 +563,7 @@ from .funimation import (
) )
from .funk import FunkIE from .funk import FunkIE
from .fusion import FusionIE from .fusion import FusionIE
from .fuyintv import FuyinTVIE
from .gab import ( from .gab import (
GabTVIE, GabTVIE,
GabIE, GabIE,

View File

@ -730,14 +730,14 @@ class InfoExtractor:
else: else:
return err.code in variadic(expected_status) return err.code in variadic(expected_status)
def _create_request(self, url_or_request, data=None, headers={}, query={}): def _create_request(self, url_or_request, data=None, headers=None, query=None):
if isinstance(url_or_request, urllib.request.Request): if isinstance(url_or_request, urllib.request.Request):
return update_Request(url_or_request, data=data, headers=headers, query=query) return update_Request(url_or_request, data=data, headers=headers, query=query)
if query: if query:
url_or_request = update_url_query(url_or_request, query) url_or_request = update_url_query(url_or_request, query)
return sanitized_Request(url_or_request, data, headers) return sanitized_Request(url_or_request, data, headers or {})
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None): def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
""" """
Return the response handle. Return the response handle.
@ -765,8 +765,8 @@ class InfoExtractor:
# geo unrestricted country. We will do so once we encounter any # geo unrestricted country. We will do so once we encounter any
# geo restriction error. # geo restriction error.
if self._x_forwarded_for_ip: if self._x_forwarded_for_ip:
if 'X-Forwarded-For' not in headers: headers = (headers or {}).copy()
headers['X-Forwarded-For'] = self._x_forwarded_for_ip headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
try: try:
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query)) return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))

View File

@ -0,0 +1,30 @@
from .common import InfoExtractor
from ..utils import traverse_obj
class FuyinTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fuyin\.tv/html/(?:\d+)/(?P<id>\d+)\.html'
_TESTS = [{
'url': 'https://www.fuyin.tv/html/2733/44129.html',
'info_dict': {
'id': '44129',
'ext': 'mp4',
'title': '第1集',
'description': 'md5:21a3d238dc8d49608e1308e85044b9c3',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
json_data = self._download_json(
'https://www.fuyin.tv/api/api/tv.movie/url',
video_id, query={'urlid': f'{video_id}'})
webpage = self._download_webpage(url, video_id, fatal=False)
return {
'id': video_id,
'title': traverse_obj(json_data, ('data', 'title')),
'url': json_data['data']['url'],
'ext': 'mp4',
'description': self._html_search_meta('description', webpage),
}

View File

@ -6,14 +6,18 @@ from ..utils import ExtractorError
class VideocampusSachsenIE(InfoExtractor): class VideocampusSachsenIE(InfoExtractor):
IE_NAME = 'Vimp' IE_NAME = 'ViMP'
_INSTANCES = ( _INSTANCES = (
'bergauf.tv',
'campus.demo.vimp.com', 'campus.demo.vimp.com',
'corporate.demo.vimp.com', 'corporate.demo.vimp.com',
'dancehalldatabase.com', 'dancehalldatabase.com',
'drehzahl.tv',
'educhannel.hs-gesundheit.de', 'educhannel.hs-gesundheit.de',
'emedia.ls.haw-hamburg.de', 'emedia.ls.haw-hamburg.de',
'globale-evolution.net', 'globale-evolution.net',
'hohu.tv',
'htvideos.hightechhigh.org',
'k210039.vimp.mivitec.net', 'k210039.vimp.mivitec.net',
'media.cmslegal.com', 'media.cmslegal.com',
'media.hs-furtwangen.de', 'media.hs-furtwangen.de',
@ -25,6 +29,7 @@ class VideocampusSachsenIE(InfoExtractor):
'mportal.europa-uni.de', 'mportal.europa-uni.de',
'pacific.demo.vimp.com', 'pacific.demo.vimp.com',
'slctv.com', 'slctv.com',
'streaming.prairiesouth.ca',
'tube.isbonline.cn', 'tube.isbonline.cn',
'univideo.uni-kassel.de', 'univideo.uni-kassel.de',
'ursula2.genetics.emory.edu', 'ursula2.genetics.emory.edu',
@ -52,11 +57,15 @@ class VideocampusSachsenIE(InfoExtractor):
'vimp.weka-fachmedien.de', 'vimp.weka-fachmedien.de',
'webtv.univ-montp3.fr', 'webtv.univ-montp3.fr',
'www.b-tu.de/media', 'www.b-tu.de/media',
'www.bergauf.tv',
'www.bigcitytv.de', 'www.bigcitytv.de',
'www.cad-videos.de', 'www.cad-videos.de',
'www.drehzahl.tv',
'www.fh-bielefeld.de/medienportal', 'www.fh-bielefeld.de/medienportal',
'www.hohu.tv',
'www.orvovideo.com', 'www.orvovideo.com',
'www.rwe.tv', 'www.rwe.tv',
'www.salzi.tv',
'www.wenglor-media.com', 'www.wenglor-media.com',
'www2.univ-sba.dz', 'www2.univ-sba.dz',
) )
@ -73,6 +82,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': 'e6b9349905c1628631f175712250f2a1', 'id': 'e6b9349905c1628631f175712250f2a1',
'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7', 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7', 'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
'thumbnail': 'https://videocampus.sachsen.de/cache/1a985379ad3aecba8097a6902c7daa4e.jpg',
'ext': 'mp4', 'ext': 'mp4',
}, },
}, },
@ -82,6 +92,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': 'fc99c527e4205b121cb7c74433469262', 'id': 'fc99c527e4205b121cb7c74433469262',
'title': 'Was ist selbstgesteuertes Lernen?', 'title': 'Was ist selbstgesteuertes Lernen?',
'description': 'md5:196aa3b0509a526db62f84679522a2f5', 'description': 'md5:196aa3b0509a526db62f84679522a2f5',
'thumbnail': 'https://videocampus.sachsen.de/cache/6f4a85096ba24cb398e6ce54446b57ae.jpg',
'display_id': 'Was-ist-selbstgesteuertes-Lernen', 'display_id': 'Was-ist-selbstgesteuertes-Lernen',
'ext': 'mp4', 'ext': 'mp4',
}, },
@ -92,6 +103,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': '09d4ed029002eb1bdda610f1103dd54c', 'id': '09d4ed029002eb1bdda610f1103dd54c',
'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht', 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58', 'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
'thumbnail': 'https://videocampus.sachsen.de/cache/2452498fe8c2d5a7dc79a05d30f407b6.jpg',
'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht', 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
'ext': 'mp4', 'ext': 'mp4',
}, },
@ -103,6 +115,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': '0183356e41af7bfb83d7667b20d9b6a3', 'id': '0183356e41af7bfb83d7667b20d9b6a3',
'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22', 'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
'description': 'md5:508958bd93e0ca002ac731d94182a54f', 'description': 'md5:508958bd93e0ca002ac731d94182a54f',
'thumbnail': 'https://www2.univ-sba.dz/cache/4d5d4a0b4189271a8cc6cb5328e14769.jpg',
'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122', 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
'ext': 'mp4', 'ext': 'mp4',
} }
@ -113,6 +126,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': 'c8816f1cc942c12b6cce57c835cffd7c', 'id': 'c8816f1cc942c12b6cce57c835cffd7c',
'title': 'Preisverleihung »Produkte des Jahres 2022«', 'title': 'Preisverleihung »Produkte des Jahres 2022«',
'description': 'md5:60c347568ca89aa25b772c4ea564ebd3', 'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
'thumbnail': 'https://vimp.weka-fachmedien.de/cache/da9f3090e9227b25beacf67ccf94de14.png',
'display_id': 'Preisverleihung-Produkte-des-Jahres-2022', 'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
'ext': 'mp4', 'ext': 'mp4',
}, },
@ -124,7 +138,7 @@ class VideocampusSachsenIE(InfoExtractor):
'title': 'Was ist selbstgesteuertes Lernen?', 'title': 'Was ist selbstgesteuertes Lernen?',
'ext': 'mp4', 'ext': 'mp4',
}, },
} },
] ]
def _real_extract(self, url): def _real_extract(self, url):
@ -139,12 +153,14 @@ class VideocampusSachsenIE(InfoExtractor):
if not (display_id or tmp_id): if not (display_id or tmp_id):
# Title, description from embedded page's meta wouldn't be correct # Title, description from embedded page's meta wouldn't be correct
title = self._html_search_regex(r'<img[^>]* title="([^"<]+)"', webpage, 'title', fatal=False) title = self._html_search_regex(r'<video-js[^>]* data-piwik-title="([^"<]+)"', webpage, 'title', fatal=False)
description = None description = None
thumbnail = None
else: else:
title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False) title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
description = self._html_search_meta( description = self._html_search_meta(
('og:description', 'twitter:description', 'description'), webpage, default=None) ('og:description', 'twitter:description', 'description'), webpage, fatal=False)
thumbnail = self._html_search_meta(('og:image', 'twitter:image'), webpage, fatal=False)
formats, subtitles = [], {} formats, subtitles = [], {}
try: try:
@ -162,7 +178,8 @@ class VideocampusSachsenIE(InfoExtractor):
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail,
'display_id': display_id, 'display_id': display_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles 'subtitles': subtitles,
} }

View File

@ -3015,9 +3015,9 @@ def update_url_query(url, query):
query=urllib.parse.urlencode(qs, True))) query=urllib.parse.urlencode(qs, True)))
def update_Request(req, url=None, data=None, headers={}, query={}): def update_Request(req, url=None, data=None, headers=None, query=None):
req_headers = req.headers.copy() req_headers = req.headers.copy()
req_headers.update(headers) req_headers.update(headers or {})
req_data = data or req.data req_data = data or req.data
req_url = update_url_query(url or req.get_full_url(), query) req_url = update_url_query(url or req.get_full_url(), query)
req_get_method = req.get_method() req_get_method = req.get_method()