mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-14 21:23:05 +00:00
Compare commits
9 Commits
079a7cfc71
...
ab6df717d1
Author | SHA1 | Date | |
---|---|---|---|
|
ab6df717d1 | ||
|
0c8d9e5fec | ||
|
3f047fc406 | ||
|
82b5176783 | ||
|
17b183886f | ||
|
cd170e8184 | ||
|
297e9952b6 | ||
|
dca4f46274 | ||
|
5dee3ad037 |
@ -3,7 +3,6 @@
|
||||
- **17live:clip**
|
||||
- **1tv**: Первый канал
|
||||
- **20min**
|
||||
- **220.ro**
|
||||
- **23video**
|
||||
- **247sports**
|
||||
- **24video**
|
||||
@ -11,7 +10,6 @@
|
||||
- **3sat**
|
||||
- **4tube**
|
||||
- **56.com**
|
||||
- **5min**
|
||||
- **6play**
|
||||
- **7plus**
|
||||
- **8tracks**
|
||||
@ -381,7 +379,6 @@
|
||||
- **FranceTVSite**
|
||||
- **Freesound**
|
||||
- **freespeech.org**
|
||||
- **FreshLive**
|
||||
- **FrontendMasters**
|
||||
- **FrontendMastersCourse**
|
||||
- **FrontendMastersLesson**
|
||||
@ -454,7 +451,6 @@
|
||||
- **hitbox:live**
|
||||
- **HitRecord**
|
||||
- **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
|
||||
- **HornBunny**
|
||||
- **HotNewHipHop**
|
||||
- **hotstar**
|
||||
- **hotstar:playlist**
|
||||
@ -499,7 +495,6 @@
|
||||
- **iq.com**: International version of iQiyi
|
||||
- **iq.com:album**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **Ir90Tv**
|
||||
- **ITTF**
|
||||
- **ITV**
|
||||
- **ITVBTCC**
|
||||
@ -516,7 +511,6 @@
|
||||
- **JWPlatform**
|
||||
- **Kakao**
|
||||
- **Kaltura**
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
- **KarriereVideos**
|
||||
- **Katsomo**
|
||||
@ -989,7 +983,6 @@
|
||||
- **RoosterTeeth**
|
||||
- **RoosterTeethSeries**
|
||||
- **RottenTomatoes**
|
||||
- **Roxwel**
|
||||
- **Rozhlas**
|
||||
- **RTBF**
|
||||
- **RTDocumentry**
|
||||
@ -1181,7 +1174,6 @@
|
||||
- **TheIntercept**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheScene**
|
||||
- **TheStar**
|
||||
- **TheSun**
|
||||
- **ThetaStream**
|
||||
@ -1388,7 +1380,6 @@
|
||||
- **VShare**
|
||||
- **VTM**
|
||||
- **VTXTV**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **Vupload**
|
||||
- **VVVVID**
|
||||
|
@ -3860,7 +3860,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
self.to_screen(f'[info] Downloading {thumb_display_id} ...')
|
||||
try:
|
||||
uf = self.urlopen(t['url'])
|
||||
uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
|
||||
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
|
||||
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
|
@ -416,26 +416,35 @@ class AfreecaTVLiveIE(AfreecaTVIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
|
||||
password = self.get_param('videopassword')
|
||||
|
||||
info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
|
||||
data=urlencode_postdata({'bid': broadcaster_id})) or {}
|
||||
channel_info = info.get('CHANNEL') or {}
|
||||
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
||||
broadcast_no = channel_info.get('BNO') or broadcast_no
|
||||
password_protected = channel_info.get('BPWD')
|
||||
if not broadcast_no:
|
||||
raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True)
|
||||
if password_protected == 'Y' and password is None:
|
||||
raise ExtractorError(
|
||||
'This livestream is protected by a password, use the --video-password option',
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
quality_key = qualities(self._QUALITIES)
|
||||
for quality_str in self._QUALITIES:
|
||||
aid_response = self._download_json(
|
||||
self._LIVE_API_URL, broadcast_no, fatal=False,
|
||||
data=urlencode_postdata({
|
||||
params = {
|
||||
'bno': broadcast_no,
|
||||
'stream_type': 'common',
|
||||
'type': 'aid',
|
||||
'quality': quality_str,
|
||||
}),
|
||||
}
|
||||
if password is not None:
|
||||
params['pwd'] = password
|
||||
aid_response = self._download_json(
|
||||
self._LIVE_API_URL, broadcast_no, fatal=False,
|
||||
data=urlencode_postdata(params),
|
||||
note=f'Downloading access token for {quality_str} stream',
|
||||
errnote=f'Unable to download access token for {quality_str} stream')
|
||||
aid = traverse_obj(aid_response, ('CHANNEL', 'AID'))
|
||||
|
@ -1,32 +1,45 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class BeegIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com(?:/video)?)/-?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# api/v6 v1
|
||||
'url': 'http://beeg.com/5416503',
|
||||
'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
|
||||
'url': 'https://beeg.com/-0983946056129650',
|
||||
'md5': '51d235147c4627cfce884f844293ff88',
|
||||
'info_dict': {
|
||||
'id': '5416503',
|
||||
'id': '0983946056129650',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sultry Striptease',
|
||||
'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',
|
||||
'timestamp': 1391813355,
|
||||
'upload_date': '20140207',
|
||||
'duration': 383,
|
||||
'title': 'sucked cock and fucked in a private plane',
|
||||
'duration': 927,
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
'upload_date': '20220131',
|
||||
'timestamp': 1643656455,
|
||||
'display_id': 2540839,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://beeg.com/-0599050563103750?t=4-861',
|
||||
'md5': 'bd8b5ea75134f7f07fad63008db2060e',
|
||||
'info_dict': {
|
||||
'id': '0599050563103750',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bad Relatives',
|
||||
'duration': 2060,
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
|
||||
'timestamp': 1643623200,
|
||||
'display_id': 2569965,
|
||||
'upload_date': '20220131',
|
||||
}
|
||||
}, {
|
||||
# api/v6 v2
|
||||
@ -36,12 +49,6 @@ class BeegIE(InfoExtractor):
|
||||
# api/v6 v2 w/o t
|
||||
'url': 'https://beeg.com/1277207756',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beeg.porn/video/5416503',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beeg.porn/5416503',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -49,68 +56,38 @@ class BeegIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
beeg_version = self._search_regex(
|
||||
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
|
||||
default='1546225636701')
|
||||
|
||||
if len(video_id) >= 10:
|
||||
query = {
|
||||
'v': 2,
|
||||
}
|
||||
qs = parse_qs(url)
|
||||
t = qs.get('t', [''])[0].split('-')
|
||||
if len(t) > 1:
|
||||
query.update({
|
||||
's': t[0],
|
||||
'e': t[1],
|
||||
})
|
||||
else:
|
||||
query = {'v': 1}
|
||||
|
||||
for api_path in ('', 'api.'):
|
||||
video = self._download_json(
|
||||
'https://%sbeeg.com/api/v6/%s/video/%s'
|
||||
% (api_path, beeg_version, video_id), video_id,
|
||||
fatal=api_path == 'api.', query=query)
|
||||
if video:
|
||||
break
|
||||
'https://store.externulls.com/facts/file/%s' % video_id,
|
||||
video_id, 'Downloading JSON for %s' % video_id)
|
||||
|
||||
fc_facts = video.get('fc_facts')
|
||||
first_fact = {}
|
||||
for fact in fc_facts:
|
||||
if not first_fact or try_get(fact, lambda x: x['id'] < first_fact['id']):
|
||||
first_fact = fact
|
||||
|
||||
resources = traverse_obj(video, ('file', 'hls_resources')) or first_fact.get('hls_resources')
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video.items():
|
||||
if not video_url:
|
||||
for format_id, video_uri in resources.items():
|
||||
if not video_uri:
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
if not height:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(
|
||||
video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
|
||||
'format_id': format_id,
|
||||
'height': int(height),
|
||||
})
|
||||
height = int_or_none(self._search_regex(r'fl_cdn_(\d+)', format_id, 'height', default=None))
|
||||
current_formats = self._extract_m3u8_formats(f'https://video.beeg.com/{video_uri}', video_id, ext='mp4', m3u8_id=str(height))
|
||||
for f in current_formats:
|
||||
f['height'] = height
|
||||
formats.extend(current_formats)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video['title']
|
||||
video_id = compat_str(video.get('id') or video_id)
|
||||
display_id = video.get('code')
|
||||
description = video.get('desc')
|
||||
series = video.get('ps_name')
|
||||
|
||||
timestamp = unified_timestamp(video.get('date'))
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'tags': tags,
|
||||
'display_id': first_fact.get('id'),
|
||||
'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
|
||||
'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
|
||||
'timestamp': unified_timestamp(first_fact.get('fc_created')),
|
||||
'duration': int_or_none(traverse_obj(video, ('file', 'fl_duration'))),
|
||||
'tags': traverse_obj(video, ('tags', ..., 'tg_name')),
|
||||
'formats': formats,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
}
|
||||
|
@ -239,6 +239,7 @@ class InfoExtractor(object):
|
||||
* "resolution" (optional, string "{width}x{height}",
|
||||
deprecated)
|
||||
* "filesize" (optional, int)
|
||||
* "http_headers" (dict) - HTTP headers for the request
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
@ -272,6 +273,8 @@ class InfoExtractor(object):
|
||||
* "url": A URL pointing to the subtitles file
|
||||
It can optionally also have:
|
||||
* "name": Name or description of the subtitles
|
||||
* http_headers: A dictionary of additional HTTP headers
|
||||
to add to the request.
|
||||
"ext" will be calculated from URL if missing
|
||||
automatic_captions: Like 'subtitles'; contains automatically generated
|
||||
captions instead of normal subtitles
|
||||
|
@ -7,16 +7,6 @@ class EngadgetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# video with 5min ID
|
||||
'url': 'http://www.engadget.com/video/518153925/',
|
||||
'md5': 'c6820d4828a5064447a4d9fc73f312c9',
|
||||
'info_dict': {
|
||||
'id': '518153925',
|
||||
'ext': 'mp4',
|
||||
'title': 'Samsung Galaxy Tab Pro 8.4 Review',
|
||||
},
|
||||
'add_ie': ['FiveMin'],
|
||||
}, {
|
||||
# video with vidible ID
|
||||
'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/',
|
||||
'only_matching': True,
|
||||
|
@ -481,7 +481,6 @@ from .filmon import (
|
||||
)
|
||||
from .filmweb import FilmwebIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .folketinget import FolketingetIE
|
||||
@ -513,7 +512,6 @@ from .francetv import (
|
||||
)
|
||||
from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .freshlive import FreshLiveIE
|
||||
from .frontendmasters import (
|
||||
FrontendMastersIE,
|
||||
FrontendMastersLessonIE,
|
||||
@ -548,7 +546,10 @@ from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .gedidigital import GediDigitalIE
|
||||
from .generic import GenericIE
|
||||
from .gettr import GettrIE
|
||||
from .gettr import (
|
||||
GettrIE,
|
||||
GettrStreamingIE,
|
||||
)
|
||||
from .gfycat import GfycatIE
|
||||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
@ -585,7 +586,6 @@ from .hidive import HiDiveIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hitrecord import HitRecordIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .hotstar import (
|
||||
HotStarIE,
|
||||
@ -655,7 +655,6 @@ from .iqiyi import (
|
||||
IqIE,
|
||||
IqAlbumIE
|
||||
)
|
||||
from .ir90tv import Ir90TvIE
|
||||
from .itv import (
|
||||
ITVIE,
|
||||
ITVBTCCIE,
|
||||
@ -677,7 +676,6 @@ from .joj import JojIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
@ -1320,11 +1318,9 @@ from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .rice import RICEIE
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rozhlas import RozhlasIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rte import RteIE, RteRadioIE
|
||||
@ -1595,7 +1591,6 @@ from .theplatform import (
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
)
|
||||
from .thescene import TheSceneIE
|
||||
from .thestar import TheStarIE
|
||||
from .thesun import TheSunIE
|
||||
from .theta import (
|
||||
@ -1900,7 +1895,6 @@ from .vrv import (
|
||||
from .vshare import VShareIE
|
||||
from .vtm import VTMIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vupload import VuploadIE
|
||||
from .vvvvid import (
|
||||
|
@ -1,54 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FiveMinIE(InfoExtractor):
|
||||
IE_NAME = '5min'
|
||||
_VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com/|delivery\.vidible\.tv/aol)(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
# From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
|
||||
'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
|
||||
'md5': '4f7b0b79bf1a470e5004f7112385941d',
|
||||
'info_dict': {
|
||||
'id': '518013791',
|
||||
'ext': 'mp4',
|
||||
'title': 'iPad Mini with Retina Display Review',
|
||||
'description': 'iPad mini with Retina Display review',
|
||||
'duration': 177,
|
||||
'uploader': 'engadget',
|
||||
'upload_date': '20131115',
|
||||
'timestamp': 1384515288,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
# From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
|
||||
'url': '5min:518086247',
|
||||
'md5': 'e539a9dd682c288ef5a498898009f69e',
|
||||
'info_dict': {
|
||||
'id': '518086247',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Make a Next-Level Fruit Salad',
|
||||
'duration': 184,
|
||||
},
|
||||
'skip': 'no longer available',
|
||||
},
|
||||
{
|
||||
'url': 'http://embed.5min.com/518726732/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://delivery.vidible.tv/aol?playList=518013791',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result('aol-video:%s' % video_id)
|
@ -1,80 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class FreshLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://freshlive\.tv/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://freshlive.tv/satotv/74712',
|
||||
'md5': '9f0cf5516979c4454ce982df3d97f352',
|
||||
'info_dict': {
|
||||
'id': '74712',
|
||||
'ext': 'mp4',
|
||||
'title': 'テスト',
|
||||
'description': 'テスト',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1511,
|
||||
'timestamp': 1483619655,
|
||||
'upload_date': '20170105',
|
||||
'uploader': 'サトTV',
|
||||
'uploader_id': 'satotv',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'is_live': False,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__CONTEXT__\s*=\s*({.+?});\s*</script>',
|
||||
webpage, 'initial context'),
|
||||
video_id)
|
||||
|
||||
info = options['context']['dispatcher']['stores']['ProgramStore']['programs'][video_id]
|
||||
|
||||
title = info['title']
|
||||
|
||||
if info.get('status') == 'upcoming':
|
||||
raise ExtractorError('Stream %s is upcoming' % video_id, expected=True)
|
||||
|
||||
stream_url = info.get('liveStreamUrl') or info['archiveStreamUrl']
|
||||
|
||||
is_live = info.get('liveStreamUrl') is not None
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('thumbnailUrl'),
|
||||
'duration': int_or_none(info.get('airTime')),
|
||||
'timestamp': unified_timestamp(info.get('createdAt')),
|
||||
'uploader': try_get(
|
||||
info, lambda x: x['channel']['title'], compat_str),
|
||||
'uploader_id': try_get(
|
||||
info, lambda x: x['channel']['code'], compat_str),
|
||||
'uploader_url': try_get(
|
||||
info, lambda x: x['channel']['permalink'], compat_str),
|
||||
'view_count': int_or_none(info.get('viewCount')),
|
||||
'comment_count': int_or_none(info.get('commentCount')),
|
||||
'tags': info.get('tags', []),
|
||||
'is_live': is_live,
|
||||
}
|
@ -1456,24 +1456,6 @@ class GenericIE(InfoExtractor):
|
||||
'duration': 45.115,
|
||||
},
|
||||
},
|
||||
# 5min embed
|
||||
{
|
||||
'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
|
||||
'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
|
||||
'info_dict': {
|
||||
'id': '518726732',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook Creates "On This Day" | Crunch Report',
|
||||
'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
|
||||
'timestamp': 1427237531,
|
||||
'uploader': 'Crunch Report',
|
||||
'upload_date': '20150324',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Crooks and Liars embed
|
||||
{
|
||||
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
|
||||
@ -3337,12 +3319,6 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for 5min embeds
|
||||
mobj = re.search(
|
||||
r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
|
||||
|
||||
# Look for Crooks and Liars embeds
|
||||
mobj = re.search(
|
||||
r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
|
||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
bool_or_none,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
@ -15,10 +16,17 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class GettrIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?gettr\.com/post/(?P<id>[a-z0-9]+)'
|
||||
class GettrBaseIE(InfoExtractor):
|
||||
_BASE_REGEX = r'https?://(www\.)?gettr\.com/'
|
||||
_MEDIA_BASE_URL = 'https://media.gettr.com/'
|
||||
|
||||
def _call_api(self, path, video_id, *args, **kwargs):
|
||||
return self._download_json(urljoin('https://api.gettr.com/u/', path), video_id, *args, **kwargs)['result']
|
||||
|
||||
|
||||
class GettrIE(GettrBaseIE):
|
||||
_VALID_URL = GettrBaseIE._BASE_REGEX + r'post/(?P<id>[a-z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.gettr.com/post/pcf6uv838f',
|
||||
'info_dict': {
|
||||
@ -51,11 +59,10 @@ class GettrIE(InfoExtractor):
|
||||
post_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, post_id)
|
||||
|
||||
api_data = self._download_json(
|
||||
'https://api.gettr.com/u/post/%s?incl="poststats|userinfo"' % post_id, post_id)
|
||||
api_data = self._call_api('post/%s?incl="poststats|userinfo"' % post_id, post_id)
|
||||
|
||||
post_data = try_get(api_data, lambda x: x['result']['data'])
|
||||
user_data = try_get(api_data, lambda x: x['result']['aux']['uinf'][post_data['uid']]) or {}
|
||||
post_data = api_data.get('data')
|
||||
user_data = try_get(api_data, lambda x: x['aux']['uinf'][post_data['uid']]) or {}
|
||||
|
||||
if post_data.get('nfound'):
|
||||
raise ExtractorError(post_data.get('txt'), expected=True)
|
||||
@ -108,3 +115,71 @@ class GettrIE(InfoExtractor):
|
||||
'duration': float_or_none(post_data.get('vid_dur')),
|
||||
'tags': post_data.get('htgs'),
|
||||
}
|
||||
|
||||
|
||||
class GettrStreamingIE(GettrBaseIE):
|
||||
_VALID_URL = GettrBaseIE._BASE_REGEX + r'streaming/(?P<id>[a-z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://gettr.com/streaming/psoiulc122',
|
||||
'info_dict': {
|
||||
'id': 'psoiulc122',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:56bca4b8f48f1743d9fd03d49c723017',
|
||||
'view_count': int,
|
||||
'uploader': 'Corona Investigative Committee',
|
||||
'uploader_id': 'coronacommittee',
|
||||
'duration': 5180.184,
|
||||
'thumbnail': r're:^https?://.+',
|
||||
'title': 'Day 1: Opening Session of the Grand Jury Proceeding',
|
||||
'timestamp': 1644080997.164,
|
||||
'upload_date': '20220205',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gettr.com/streaming/psfmeefcc1',
|
||||
'info_dict': {
|
||||
'id': 'psfmeefcc1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Session 90: "The Virus Of Power"',
|
||||
'view_count': int,
|
||||
'uploader_id': 'coronacommittee',
|
||||
'description': 'md5:98986acdf656aa836bf36f9c9704c65b',
|
||||
'uploader': 'Corona Investigative Committee',
|
||||
'thumbnail': r're:^https?://.+',
|
||||
'duration': 21872.507,
|
||||
'timestamp': 1643976662.858,
|
||||
'upload_date': '20220204',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = self._call_api('live/join/%s' % video_id, video_id, data={})
|
||||
|
||||
live_info = video_info['broadcast']
|
||||
live_url = url_or_none(live_info.get('url'))
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
live_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if live_url else ([], {})
|
||||
|
||||
thumbnails = [{
|
||||
'url': urljoin(self._MEDIA_BASE_URL, thumbnail),
|
||||
} for thumbnail in try_get(video_info, lambda x: x['postData']['imgs']) or []]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': try_get(video_info, lambda x: x['postData']['ttl']),
|
||||
'description': try_get(video_info, lambda x: x['postData']['dsc']),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': try_get(video_info, lambda x: x['liveHostInfo']['nickname']),
|
||||
'uploader_id': try_get(video_info, lambda x: x['liveHostInfo']['_id']),
|
||||
'view_count': int_or_none(live_info.get('viewsCount')),
|
||||
'timestamp': float_or_none(live_info.get('startAt'), scale=1000),
|
||||
'duration': float_or_none(live_info.get('duration'), scale=1000),
|
||||
'is_live': bool_or_none(live_info.get('isLive')),
|
||||
}
|
||||
|
@ -139,11 +139,11 @@ class GloboIE(InfoExtractor):
|
||||
resource_url = source['scheme'] + '://' + source['domain'] + source['path']
|
||||
signed_url = '%s?h=%s&k=html5&a=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A')
|
||||
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for resource in video['resources']:
|
||||
if resource.get('type') == 'subtitle':
|
||||
subtitles.setdefault(resource.get('language') or 'por', []).append({
|
||||
@ -186,6 +186,7 @@ class GloboArticleIE(InfoExtractor):
|
||||
r'\bvideosIDs\s*:\s*["\']?(\d{7,})',
|
||||
r'\bdata-id=["\'](\d{7,})',
|
||||
r'<div[^>]+\bid=["\'](\d{7,})',
|
||||
r'<bs-player[^>]+\bvideoid=["\'](\d{8,})',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
@ -213,6 +214,14 @@ class GloboArticleIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://ge.globo.com/video/ta-na-area-como-foi-assistir-ao-jogo-do-palmeiras-que-a-globo-nao-passou-10287094.ghtml',
|
||||
'info_dict': {
|
||||
'id': 'ta-na-area-como-foi-assistir-ao-jogo-do-palmeiras-que-a-globo-nao-passou-10287094',
|
||||
'title': 'Tá na Área: como foi assistir ao jogo do Palmeiras que a Globo não passou',
|
||||
'description': 'md5:2d089d036c4c9675117d3a56f8c61739',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@ -228,6 +237,6 @@ class GloboArticleIE(InfoExtractor):
|
||||
entries = [
|
||||
self.url_result('globo:%s' % video_id, GloboIE.ie_key())
|
||||
for video_id in orderedSet(video_ids)]
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
return self.playlist_result(entries, display_id, title, description)
|
||||
|
@ -1,49 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class HornBunnyIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
|
||||
'md5': 'e20fd862d1894b67564c96f180f43924',
|
||||
'info_dict': {
|
||||
'id': '5227',
|
||||
'ext': 'mp4',
|
||||
'title': 'panty slut jerk off instruction',
|
||||
'duration': 550,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
|
||||
webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'<strong>Views:</strong>\s*(\d+)</div>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': 18,
|
||||
})
|
||||
|
||||
return info_dict
|
@ -80,9 +80,6 @@ class HuffPostIE(InfoExtractor):
|
||||
'vcodec': 'none' if key.startswith('audio/') else None,
|
||||
})
|
||||
|
||||
if not formats and data.get('fivemin_id'):
|
||||
return self.url_result('5min:%s' % data['fivemin_id'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@ -1,42 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_start
|
||||
|
||||
|
||||
class Ir90TvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?90tv\.ir/video/(?P<id>[0-9]+)/.*'
|
||||
_TESTS = [{
|
||||
'url': 'http://90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
|
||||
'md5': '411dbd94891381960cb9e13daa47a869',
|
||||
'info_dict': {
|
||||
'id': '95719',
|
||||
'ext': 'mp4',
|
||||
'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_start(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), '90tv.ir :: ')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<source[^>]+src="([^"]+)"', webpage, 'video url')
|
||||
|
||||
thumbnail = self._search_regex(r'poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)
|
||||
|
||||
return {
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'video_url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -301,6 +301,7 @@ class KalturaIE(InfoExtractor):
|
||||
data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for f in flavor_assets:
|
||||
# Continue if asset is not ready
|
||||
if f.get('status') != 2:
|
||||
@ -344,13 +345,14 @@ class KalturaIE(InfoExtractor):
|
||||
if '/playManifest/' in data_url:
|
||||
m3u8_url = sign_url(data_url.replace(
|
||||
'format/url', 'format/applehttp'))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, entry_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
if captions:
|
||||
for caption in captions.get('objects', []):
|
||||
# Continue if caption is not ready
|
||||
|
@ -1,48 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
class KankanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
|
||||
'md5': '29aca1e47ae68fc28804aca89f29507e',
|
||||
'info_dict': {
|
||||
'id': '48863',
|
||||
'ext': 'flv',
|
||||
'title': 'Ready To Go',
|
||||
},
|
||||
'skip': 'Only available from China',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
|
||||
surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
|
||||
gcids = re.findall(r'http://.+?/.+?/(.+?)/', surls)
|
||||
gcid = gcids[-1]
|
||||
|
||||
info_url = 'http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid
|
||||
video_info_page = self._download_webpage(
|
||||
info_url, video_id, 'Downloading video url info')
|
||||
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, 'video url ip')
|
||||
path = self._search_regex(r'path:"(.+?)"', video_info_page, 'video url path')
|
||||
param1 = self._search_regex(r'param1:(\d+)', video_info_page, 'param1')
|
||||
param2 = self._search_regex(r'param2:(\d+)', video_info_page, 'param2')
|
||||
key = _md5('xl_mp43651' + param1 + param2)
|
||||
video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class Ro220IE(InfoExtractor):
|
||||
IE_NAME = '220.ro'
|
||||
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/',
|
||||
'md5': '03af18b73a07b4088753930db7a34add',
|
||||
'info_dict': {
|
||||
'id': 'LYV6doKo7f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Luati-le Banii sez 4 ep 1',
|
||||
'description': r're:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
url = compat_urllib_parse_unquote(self._search_regex(
|
||||
r'(?s)clip\s*:\s*{.*?url\s*:\s*\'([^\']+)\'', webpage, 'url'))
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate, determine_ext
|
||||
|
||||
|
||||
class RoxwelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html',
|
||||
'info_dict': {
|
||||
'id': 'passionpittakeawalklive',
|
||||
'ext': 'flv',
|
||||
'title': 'Take A Walk (live)',
|
||||
'uploader': 'Passion Pit',
|
||||
'uploader_id': 'passionpit',
|
||||
'upload_date': '20120928',
|
||||
'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
filename = mobj.group('filename')
|
||||
info_url = 'http://www.roxwel.com/api/videos/%s' % filename
|
||||
info = self._download_json(info_url, filename)
|
||||
|
||||
rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
|
||||
best_rate = rtmp_rates[-1]
|
||||
url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
|
||||
rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url')
|
||||
ext = determine_ext(rtmp_url)
|
||||
if ext == 'f4v':
|
||||
rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
|
||||
|
||||
return {
|
||||
'id': filename,
|
||||
'title': info['title'],
|
||||
'url': rtmp_url,
|
||||
'ext': 'flv',
|
||||
'description': info['description'],
|
||||
'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
|
||||
'uploader': info['artist'],
|
||||
'uploader_id': info['artistname'],
|
||||
'upload_date': unified_strdate(info['dbdate']),
|
||||
}
|
@ -23,23 +23,27 @@ class SVTBaseIE(InfoExtractor):
|
||||
is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
|
||||
m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for vr in video_info['videoReferences']:
|
||||
player_type = vr.get('playerType') or vr.get('format')
|
||||
vurl = vr['url']
|
||||
ext = determine_ext(vurl)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
vurl, video_id,
|
||||
ext='mp4', entry_protocol=m3u8_protocol,
|
||||
m3u8_id=player_type, fatal=False))
|
||||
m3u8_id=player_type, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
vurl + '?hdcore=3.3.0', video_id,
|
||||
f4m_id=player_type, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
if player_type == 'dashhbbtv':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
vurl, video_id, mpd_id=player_type, fatal=False))
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
vurl, video_id, mpd_id=player_type, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': player_type,
|
||||
@ -52,18 +56,19 @@ class SVTBaseIE(InfoExtractor):
|
||||
countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
|
||||
if isinstance(subtitle_references, list):
|
||||
for sr in subtitle_references:
|
||||
subtitle_url = sr.get('url')
|
||||
subtitle_lang = sr.get('language', 'sv')
|
||||
if subtitle_url:
|
||||
sub = {
|
||||
'url': subtitle_url,
|
||||
}
|
||||
if determine_ext(subtitle_url) == 'm3u8':
|
||||
# TODO(yan12125): handle WebVTT in m3u8 manifests
|
||||
continue
|
||||
|
||||
subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url})
|
||||
# XXX: no way of testing, is it ever hit?
|
||||
sub['ext'] = 'vtt'
|
||||
subtitles.setdefault(subtitle_lang, []).append(sub)
|
||||
|
||||
title = video_info.get('title')
|
||||
|
||||
|
@ -1,44 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class TheSceneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear',
|
||||
'info_dict': {
|
||||
'id': '520e8faac2b4c00e3c6e5f43',
|
||||
'ext': 'mp4',
|
||||
'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear',
|
||||
'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear',
|
||||
'duration': 127,
|
||||
'series': 'Style.com Fashion Shows',
|
||||
'season': 'Ready To Wear Spring 2013',
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
'upload_date': '20120913',
|
||||
'timestamp': 1347512400,
|
||||
'uploader': 'vogue',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player_url = compat_urlparse.urljoin(
|
||||
url,
|
||||
self._html_search_regex(
|
||||
r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'display_id': display_id,
|
||||
'url': player_url,
|
||||
'ie_key': 'CondeNast',
|
||||
}
|
@ -131,6 +131,8 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
request = config.get('request') or {}
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
config_files = video_data.get('files') or request.get('files') or {}
|
||||
for f in (config_files.get('progressive') or []):
|
||||
video_url = f.get('url')
|
||||
@ -163,21 +165,24 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
sep_manifest_urls = [(format_id, manifest_url)]
|
||||
for f_id, m_url in sep_manifest_urls:
|
||||
if files_type == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m_url, video_id, 'mp4',
|
||||
'm3u8' if is_live else 'm3u8_native', m3u8_id=f_id,
|
||||
note='Downloading %s m3u8 information' % cdn_name,
|
||||
fatal=False))
|
||||
fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif files_type == 'dash':
|
||||
if 'json=1' in m_url:
|
||||
real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url')
|
||||
if real_m_url:
|
||||
m_url = real_m_url
|
||||
mpd_formats = self._extract_mpd_formats(
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
|
||||
'Downloading %s MPD information' % cdn_name,
|
||||
fatal=False)
|
||||
formats.extend(mpd_formats)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
live_archive = live_event.get('archive') or {}
|
||||
live_archive_source_url = live_archive.get('source_url')
|
||||
@ -188,12 +193,11 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'quality': 10,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for tt in (request.get('text_tracks') or []):
|
||||
subtitles[tt['lang']] = [{
|
||||
subtitles.setdefault(tt['lang'], []).append({
|
||||
'ext': 'vtt',
|
||||
'url': urljoin('https://vimeo.com', tt['url']),
|
||||
}]
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
if not is_live:
|
||||
|
@ -1,170 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VubeIE(InfoExtractor):
|
||||
IE_NAME = 'vube'
|
||||
IE_DESC = 'Vube.com'
|
||||
_VALID_URL = r'https?://vube\.com/(?:[^/]+/)+(?P<id>[\da-zA-Z]{10})\b'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://vube.com/trending/William+Wei/Y8NUZ69Tf7?t=s',
|
||||
'md5': 'e7aabe1f8f1aa826b9e4735e1f9cee42',
|
||||
'info_dict': {
|
||||
'id': 'Y8NUZ69Tf7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Best Drummer Ever [HD]',
|
||||
'description': 'md5:2d63c4b277b85c2277761c2cf7337d71',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'William',
|
||||
'timestamp': 1406876915,
|
||||
'upload_date': '20140801',
|
||||
'duration': 258.051,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'],
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}, {
|
||||
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||
'md5': 'db7aba89d4603dadd627e9d1973946fe',
|
||||
'info_dict': {
|
||||
'id': 'YL2qNPkqon',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chiara Grispo - Price Tag by Jessie J',
|
||||
'description': 'md5:8ea652a1f36818352428cb5134933313',
|
||||
'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f\.jpg$',
|
||||
'uploader': 'Chiara.Grispo',
|
||||
'timestamp': 1388743358,
|
||||
'upload_date': '20140103',
|
||||
'duration': 170.56,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
|
||||
},
|
||||
'skip': 'Removed due to DMCA',
|
||||
},
|
||||
{
|
||||
'url': 'http://vube.com/SerainaMusic/my-7-year-old-sister-and-i-singing-alive-by-krewella/UeBhTudbfS?t=s&n=1',
|
||||
'md5': '5d4a52492d76f72712117ce6b0d98d08',
|
||||
'info_dict': {
|
||||
'id': 'UeBhTudbfS',
|
||||
'ext': 'mp4',
|
||||
'title': 'My 7 year old Sister and I singing "Alive" by Krewella',
|
||||
'description': 'md5:40bcacb97796339f1690642c21d56f4a',
|
||||
'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102265d5a9f-0f17-4f6b-5753-adf08484ee1e\.jpg$',
|
||||
'uploader': 'Seraina',
|
||||
'timestamp': 1396492438,
|
||||
'upload_date': '20140403',
|
||||
'duration': 240.107,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['seraina', 'jessica', 'krewella', 'alive'],
|
||||
},
|
||||
'skip': 'Removed due to DMCA',
|
||||
}, {
|
||||
'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
|
||||
'md5': '0584fc13b50f887127d9d1007589d27f',
|
||||
'info_dict': {
|
||||
'id': '0nmsMY5vEq',
|
||||
'ext': 'mp4',
|
||||
'title': 'Frozen - Let It Go Cover by Siren Gene',
|
||||
'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
|
||||
'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
|
||||
'uploader': 'Siren',
|
||||
'timestamp': 1395448018,
|
||||
'upload_date': '20140322',
|
||||
'duration': 221.788,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
|
||||
},
|
||||
'skip': 'Removed due to DMCA',
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = self._download_json(
|
||||
'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
public_id = video['public_id']
|
||||
|
||||
formats = []
|
||||
|
||||
for media in video['media'].get('video', []) + video['media'].get('audio', []):
|
||||
if media['transcoding_status'] != 'processed':
|
||||
continue
|
||||
fmt = {
|
||||
'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id),
|
||||
'abr': int(media['audio_bitrate']),
|
||||
'format_id': compat_str(media['media_resolution_id']),
|
||||
}
|
||||
vbr = int(media['video_bitrate'])
|
||||
if vbr:
|
||||
fmt.update({
|
||||
'vbr': vbr,
|
||||
'height': int(media['height']),
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
if not formats and video.get('vst') == 'dmca':
|
||||
self.raise_no_formats(
|
||||
'This video has been removed in response to a complaint received under the US Digital Millennium Copyright Act.',
|
||||
expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
|
||||
uploader = video.get('user_alias') or video.get('channel')
|
||||
timestamp = int_or_none(video.get('upload_time'))
|
||||
duration = video['duration']
|
||||
view_count = video.get('raw_view_count')
|
||||
like_count = video.get('total_likes')
|
||||
dislike_count = video.get('total_hates')
|
||||
|
||||
comments = video.get('comments')
|
||||
comment_count = None
|
||||
if comments is None:
|
||||
comment_data = self._download_json(
|
||||
'http://vube.com/api/video/%s/comment' % video_id,
|
||||
video_id, 'Downloading video comment JSON', fatal=False)
|
||||
if comment_data is not None:
|
||||
comment_count = int_or_none(comment_data.get('total'))
|
||||
else:
|
||||
comment_count = len(comments)
|
||||
|
||||
categories = [tag['text'] for tag in video['tags']]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
}
|
@ -3006,13 +3006,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
tbr = float_or_none(
|
||||
fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
|
||||
language_preference = (
|
||||
10 if audio_track.get('audioIsDefault') and 10
|
||||
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
|
||||
else -1)
|
||||
dct = {
|
||||
'asr': int_or_none(fmt.get('audioSampleRate')),
|
||||
'filesize': int_or_none(fmt.get('contentLength')),
|
||||
'format_id': itag,
|
||||
'format_note': join_nonempty(
|
||||
'%s%s' % (audio_track.get('displayName') or '',
|
||||
' (default)' if audio_track.get('audioIsDefault') else ''),
|
||||
' (default)' if language_preference > 0 else ''),
|
||||
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
|
||||
throttled and 'THROTTLED', delim=', '),
|
||||
'source_preference': -10 if throttled else -1,
|
||||
@ -3022,8 +3026,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'tbr': tbr,
|
||||
'url': fmt_url,
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'language': audio_track.get('id', '').split('.')[0],
|
||||
'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
|
||||
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
|
||||
'desc' if language_preference < -1 else ''),
|
||||
'language_preference': language_preference,
|
||||
}
|
||||
mime_mobj = re.match(
|
||||
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
||||
@ -3408,6 +3413,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
trans_name += format_field(lang_name, template=' from %s')
|
||||
process_language(
|
||||
automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
|
||||
if lang_code == f'a-{trans_code}':
|
||||
process_language(
|
||||
automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {'tlang': trans_code})
|
||||
info['automatic_captions'] = automatic_captions
|
||||
info['subtitles'] = subtitles
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user