Compare commits

...

6 Commits

Author SHA1 Message Date
pukkandan
f1d130902b
[utils] OnDemandPagedList: Do not download pages after error 2022-03-03 02:43:16 +05:30
pukkandan
c2ae48dbd5
[manyvids] Extract uploader (#2913)
Authored by: regarten
2022-03-03 01:21:05 +05:30
pukkandan
a5c0c20252
[cleanup] Don't pass protocol to _extract_m3u8_formats for live videos
`live` parameter already handles changing the protocol
2022-03-02 22:59:48 +05:30
Lesmiscore (Naoya Ozaki)
f494ddada8
[niconico] Add playlist extractors and refactor (#2915)
Authored by: Lesmiscore
2022-03-03 02:08:13 +09:00
Lesmiscore (Naoya Ozaki)
02fc6feb6e
[mirrativ] Cleanup extractor code (#2925)
Authored by: Lesmiscore
2022-03-03 02:06:34 +09:00
pukkandan
7eaf7f9aba
[rokfin] Add stack and channel extractors (#1534)
Authored by: P-reducible, pukkandan
2022-03-02 21:39:08 +05:30
15 changed files with 320 additions and 114 deletions

View File

@ -1698,6 +1698,10 @@ The following extractors use this feature:
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`. (e.g. `20.2.1`) * `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`. (e.g. `20.2.1`)
* `manifest_app_version`: Numeric app version to call mobile APIs with. (e.g. `221`) * `manifest_app_version`: Numeric app version to call mobile APIs with. (e.g. `221`)
#### rokfinchannel
* `tab`: Which tab to download. One of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`. (E.g. `rokfinchannel:tab=streams`)
NOTE: These options may be changed/removed in the future without concern for backward compatibility NOTE: These options may be changed/removed in the future without concern for backward compatibility
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE --> <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->

View File

@ -124,8 +124,7 @@ class ArcPublishingIE(InfoExtractor):
formats.extend(smil_formats) formats.extend(smil_formats)
elif stream_type in ('ts', 'hls'): elif stream_type in ('ts', 'hls'):
m3u8_formats = self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
s_url, uuid, 'mp4', 'm3u8' if is_live else 'm3u8_native', s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
m3u8_id='hls', fatal=False)
if all([f.get('acodec') == 'none' for f in m3u8_formats]): if all([f.get('acodec') == 'none' for f in m3u8_formats]):
continue continue
for f in m3u8_formats: for f in m3u8_formats:

View File

@ -259,9 +259,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
continue continue
if media_type == 'application/x-mpegURL': if media_type == 'application/x-mpegURL':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
'm3u8' if is_live else 'm3u8_native',
m3u8_id='hls', fatal=False))
else: else:
f = { f = {
'url': media_url, 'url': media_url,

View File

@ -1011,11 +1011,12 @@ from .nick import (
NickNightIE, NickNightIE,
NickRuIE, NickRuIE,
) )
from .niconico import ( from .niconico import (
NiconicoIE, NiconicoIE,
NiconicoPlaylistIE, NiconicoPlaylistIE,
NiconicoUserIE, NiconicoUserIE,
NiconicoSeriesIE,
NiconicoHistoryIE,
NicovideoSearchDateIE, NicovideoSearchDateIE,
NicovideoSearchIE, NicovideoSearchIE,
NicovideoSearchURLIE, NicovideoSearchURLIE,
@ -1333,7 +1334,11 @@ from .reverbnation import ReverbNationIE
from .rice import RICEIE from .rice import RICEIE
from .rmcdecouverte import RMCDecouverteIE from .rmcdecouverte import RMCDecouverteIE
from .rockstargames import RockstarGamesIE from .rockstargames import RockstarGamesIE
from .rokfin import RokfinIE from .rokfin import (
RokfinIE,
RokfinStackIE,
RokfinChannelIE,
)
from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
from .rottentomatoes import RottenTomatoesIE from .rottentomatoes import RottenTomatoesIE
from .rozhlas import RozhlasIE from .rozhlas import RozhlasIE

View File

@ -96,7 +96,7 @@ class ImgGamingBaseIE(InfoExtractor):
continue continue
if proto == 'hls': if proto == 'hls':
m3u8_formats = self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
media_url, media_id, 'mp4', 'm3u8' if is_live else 'm3u8_native', media_url, media_id, 'mp4', live=is_live,
m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS) m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS)
for f in m3u8_formats: for f in m3u8_formats:
f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS) f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS)

View File

@ -89,4 +89,5 @@ class ManyVidsIE(InfoExtractor):
'view_count': view_count, 'view_count': view_count,
'like_count': like_count, 'like_count': like_count,
'formats': formats, 'formats': formats,
'uploader': self._html_search_regex(r'<meta[^>]+name="author"[^>]*>([^<]+)', webpage, 'uploader'),
} }

View File

@ -19,9 +19,25 @@ class MirrativBaseIE(InfoExtractor):
class MirrativIE(MirrativBaseIE): class MirrativIE(MirrativBaseIE):
IE_NAME = 'mirrativ' IE_NAME = 'mirrativ'
_VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P<id>[^/?#&]+)'
LIVE_API_URL = 'https://www.mirrativ.com/api/live/live?live_id=%s'
TESTS = [{ TESTS = [{
'url': 'https://mirrativ.com/live/UQomuS7EMgHoxRHjEhNiHw',
'info_dict': {
'id': 'UQomuS7EMgHoxRHjEhNiHw',
'title': 'ねむいぃ、。『参加型』🔰jcが初めてやるCOD✨初見さん大歓迎💗',
'is_live': True,
'description': 'md5:bfcd8f77f2fab24c3c672e5620f3f16e',
'thumbnail': r're:https?://.+',
'uploader': '# あ ち ゅ 。💡',
'uploader_id': '118572165',
'duration': None,
'view_count': 1241,
'release_timestamp': 1646229192,
'timestamp': 1646229167,
'was_live': False,
},
'skip': 'livestream',
}, {
'url': 'https://mirrativ.com/live/POxyuG1KmW2982lqlDTuPw', 'url': 'https://mirrativ.com/live/POxyuG1KmW2982lqlDTuPw',
'only_matching': True, 'only_matching': True,
}] }]
@ -29,12 +45,11 @@ class MirrativIE(MirrativBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage('https://www.mirrativ.com/live/%s' % video_id, video_id) webpage = self._download_webpage('https://www.mirrativ.com/live/%s' % video_id, video_id)
live_response = self._download_json(self.LIVE_API_URL % video_id, video_id) live_response = self._download_json(f'https://www.mirrativ.com/api/live/live?live_id={video_id}', video_id)
self.assert_error(live_response) self.assert_error(live_response)
hls_url = dict_get(live_response, ('archive_url_hls', 'streaming_url_hls')) hls_url = dict_get(live_response, ('archive_url_hls', 'streaming_url_hls'))
is_live = bool(live_response.get('is_live')) is_live = bool(live_response.get('is_live'))
was_live = bool(live_response.get('is_archive'))
if not hls_url: if not hls_url:
raise ExtractorError('Neither archive nor live is available.', expected=True) raise ExtractorError('Neither archive nor live is available.', expected=True)
@ -42,55 +57,29 @@ class MirrativIE(MirrativBaseIE):
hls_url, video_id, hls_url, video_id,
ext='mp4', entry_protocol='m3u8_native', ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls', live=is_live) m3u8_id='hls', live=is_live)
rtmp_url = live_response.get('streaming_url_edge')
if rtmp_url:
keys_to_copy = ('width', 'height', 'vcodec', 'acodec', 'tbr')
fmt = {
'format_id': 'rtmp',
'url': rtmp_url,
'protocol': 'rtmp',
'ext': 'mp4',
}
fmt.update({k: traverse_obj(formats, (0, k)) for k in keys_to_copy})
formats.append(fmt)
self._sort_formats(formats) self._sort_formats(formats)
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<title>\s*(.+?) - Mirrativ\s*</title>', webpage) or live_response.get('title')
description = live_response.get('description')
thumbnail = live_response.get('image_url')
duration = try_get(live_response, lambda x: x['ended_at'] - x['started_at'])
view_count = live_response.get('total_viewer_num')
release_timestamp = live_response.get('started_at')
timestamp = live_response.get('created_at')
owner = live_response.get('owner', {})
uploader = owner.get('name')
uploader_id = owner.get('user_id')
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': self._og_search_title(webpage, default=None) or self._search_regex(
r'<title>\s*(.+?) - Mirrativ\s*</title>', webpage) or live_response.get('title'),
'is_live': is_live, 'is_live': is_live,
'description': description, 'description': live_response.get('description'),
'formats': formats, 'formats': formats,
'thumbnail': thumbnail, 'thumbnail': live_response.get('image_url'),
'uploader': uploader, 'uploader': traverse_obj(live_response, ('owner', 'name')),
'uploader_id': uploader_id, 'uploader_id': traverse_obj(live_response, ('owner', 'user_id')),
'duration': duration, 'duration': try_get(live_response, lambda x: x['ended_at'] - x['started_at']) if not is_live else None,
'view_count': view_count, 'view_count': live_response.get('total_viewer_num'),
'release_timestamp': release_timestamp, 'release_timestamp': live_response.get('started_at'),
'timestamp': timestamp, 'timestamp': live_response.get('created_at'),
'was_live': was_live, 'was_live': bool(live_response.get('is_archive')),
} }
class MirrativUserIE(MirrativBaseIE): class MirrativUserIE(MirrativBaseIE):
IE_NAME = 'mirrativ:user' IE_NAME = 'mirrativ:user'
_VALID_URL = r'https?://(?:www\.)?mirrativ\.com/user/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?mirrativ\.com/user/(?P<id>\d+)'
LIVE_HISTORY_API_URL = 'https://www.mirrativ.com/api/live/live_history?user_id=%s&page=%d'
USER_INFO_API_URL = 'https://www.mirrativ.com/api/user/profile?user_id=%s'
_TESTS = [{ _TESTS = [{
# Live archive is available up to 3 days # Live archive is available up to 3 days
@ -104,8 +93,8 @@ class MirrativUserIE(MirrativBaseIE):
page = 1 page = 1
while page is not None: while page is not None:
api_response = self._download_json( api_response = self._download_json(
self.LIVE_HISTORY_API_URL % (user_id, page), user_id, f'https://www.mirrativ.com/api/live/live_history?user_id={user_id}&page={page}', user_id,
note='Downloading page %d' % page) note=f'Downloading page {page}')
self.assert_error(api_response) self.assert_error(api_response)
lives = api_response.get('lives') lives = api_response.get('lives')
if not lives: if not lives:
@ -123,12 +112,10 @@ class MirrativUserIE(MirrativBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
user_id = self._match_id(url) user_id = self._match_id(url)
user_info = self._download_json( user_info = self._download_json(
self.USER_INFO_API_URL % user_id, user_id, f'https://www.mirrativ.com/api/user/profile?user_id={user_id}', user_id,
note='Downloading user info', fatal=False) note='Downloading user info', fatal=False)
self.assert_error(user_info) self.assert_error(user_info)
uploader = user_info.get('name') return self.playlist_result(
description = user_info.get('description') self._entries(user_id), user_id,
user_info.get('name'), user_info.get('description'))
entries = self._entries(user_id)
return self.playlist_result(entries, user_id, uploader, description)

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import datetime import datetime
import itertools import itertools
import functools
import json import json
import re import re
@ -12,6 +13,7 @@ from ..compat import (
compat_str, compat_str,
compat_parse_qs, compat_parse_qs,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_HTTPError,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
@ -24,7 +26,9 @@ from ..utils import (
PostProcessingError, PostProcessingError,
remove_start, remove_start,
str_or_none, str_or_none,
traverse_obj,
try_get, try_get,
unescapeHTML,
unified_timestamp, unified_timestamp,
urlencode_postdata, urlencode_postdata,
xpath_text, xpath_text,
@ -606,8 +610,61 @@ class NiconicoIE(InfoExtractor):
} }
class NiconicoPlaylistIE(InfoExtractor): class NiconicoPlaylistBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)' _PAGE_SIZE = 100
_API_HEADERS = {
'X-Frontend-ID': '6',
'X-Frontend-Version': '0',
'X-Niconico-Language': 'en-us'
}
def _call_api(self, list_id, resource, query):
"Implement this in child class"
pass
@staticmethod
def _parse_owner(item):
return {
'uploader': traverse_obj(item, ('owner', 'name')),
'uploader_id': traverse_obj(item, ('owner', 'id')),
}
def _fetch_page(self, list_id, page):
page += 1
resp = self._call_api(list_id, 'page %d' % page, {
'page': page,
'pageSize': self._PAGE_SIZE,
})
# this is needed to support both mylist and user
for video in traverse_obj(resp, ('items', ..., ('video', None))) or []:
video_id = video.get('id')
if not video_id:
# skip {"video": {"id": "blablabla", ...}}
continue
count = video.get('count') or {}
get_count = lambda x: int_or_none(count.get(x))
yield {
'_type': 'url',
'id': video_id,
'title': video.get('title'),
'url': f'https://www.nicovideo.jp/watch/{video_id}',
'description': video.get('shortDescription'),
'duration': int_or_none(video.get('duration')),
'view_count': get_count('view'),
'comment_count': get_count('comment'),
'thumbnail': traverse_obj(video, ('thumbnail', ('nHdUrl', 'largeUrl', 'listingUrl', 'url'))),
'ie_key': NiconicoIE.ie_key(),
**self._parse_owner(video),
}
def _entries(self, list_id):
return OnDemandPagedList(functools.partial(self._fetch_page, list_id), self._PAGE_SIZE)
class NiconicoPlaylistIE(NiconicoPlaylistBaseIE):
IE_NAME = 'niconico:playlist'
_VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/(?:user/\d+/)?(?:my/)?mylist/(?:#/)?(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.nicovideo.jp/mylist/27411728', 'url': 'http://www.nicovideo.jp/mylist/27411728',
@ -618,48 +675,110 @@ class NiconicoPlaylistIE(InfoExtractor):
'uploader': 'のっく', 'uploader': 'のっく',
'uploader_id': '805442', 'uploader_id': '805442',
}, },
'playlist_mincount': 225, 'playlist_mincount': 291,
}, { }, {
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728', 'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.nicovideo.jp/my/mylist/#/68048635',
'only_matching': True,
}] }]
_API_HEADERS = { def _call_api(self, list_id, resource, query):
'X-Frontend-ID': '6', return self._download_json(
'X-Frontend-Version': '0' f'https://nvapi.nicovideo.jp/v2/mylists/{list_id}', list_id,
} f'Downloading {resource}', query=query,
headers=self._API_HEADERS)['data']['mylist']
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
mylist = self._call_api(list_id, 'list', {
'pageSize': 1,
})
return self.playlist_result(
self._entries(list_id), list_id,
mylist.get('name'), mylist.get('description'), **self._parse_owner(mylist))
def get_page_data(pagenum, pagesize):
class NiconicoSeriesIE(InfoExtractor):
IE_NAME = 'niconico:series'
_VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/series/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.nicovideo.jp/series/110226',
'info_dict': {
'id': '110226',
'title': 'ご立派ァ!のシリーズ',
},
'playlist_mincount': 10, # as of 2021/03/17
}, {
'url': 'https://www.nicovideo.jp/series/12312/',
'info_dict': {
'id': '12312',
'title': 'バトルスピリッツ お勧めカード紹介(調整中)',
},
'playlist_mincount': 97, # as of 2021/03/17
}, {
'url': 'https://nico.ms/series/203559',
'only_matching': True,
}]
def _real_extract(self, url):
list_id = self._match_id(url)
webpage = self._download_webpage(f'https://www.nicovideo.jp/series/{list_id}', list_id)
title = self._search_regex(
(r'<title>「(.+)(全',
r'<div class="TwitterShareButton"\s+data-text="(.+)\s+https:'),
webpage, 'title', fatal=False)
if title:
title = unescapeHTML(title)
playlist = [
self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)]
return self.playlist_result(playlist, list_id, title)
class NiconicoHistoryIE(NiconicoPlaylistBaseIE):
IE_NAME = 'niconico:history'
IE_DESC = 'NicoNico user history. Requires cookies.'
_VALID_URL = r'https?://(?:www\.|sp\.)?nicovideo\.jp/my/history'
_TESTS = [{
'note': 'PC page, with /video',
'url': 'https://www.nicovideo.jp/my/history/video',
'only_matching': True,
}, {
'note': 'PC page, without /video',
'url': 'https://www.nicovideo.jp/my/history',
'only_matching': True,
}, {
'note': 'mobile page, with /video',
'url': 'https://sp.nicovideo.jp/my/history/video',
'only_matching': True,
}, {
'note': 'mobile page, without /video',
'url': 'https://sp.nicovideo.jp/my/history',
'only_matching': True,
}]
def _call_api(self, list_id, resource, query):
return self._download_json( return self._download_json(
'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id, 'https://nvapi.nicovideo.jp/v1/users/me/watch/history', 'history',
query={'page': 1 + pagenum, 'pageSize': pagesize}, f'Downloading {resource}', query=query,
headers=self._API_HEADERS).get('data').get('mylist') headers=self._API_HEADERS)['data']
data = get_page_data(0, 1) def _real_extract(self, url):
title = data.get('name') list_id = 'history'
description = data.get('description') try:
uploader = data.get('owner').get('name') mylist = self._call_api(list_id, 'list', {
uploader_id = data.get('owner').get('id') 'pageSize': 1,
})
def pagefunc(pagenum): except ExtractorError as e:
data = get_page_data(pagenum, 25) if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
return ({ self.raise_login_required('You have to be logged in to get your watch history')
'_type': 'url', raise
'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'), return self.playlist_result(self._entries(list_id), list_id, **self._parse_owner(mylist))
} for item in data.get('items'))
return {
'_type': 'playlist',
'id': list_id,
'title': title,
'description': description,
'uploader': uploader,
'uploader_id': uploader_id,
'entries': OnDemandPagedList(pagefunc, 25),
}
class NicovideoSearchBaseIE(InfoExtractor): class NicovideoSearchBaseIE(InfoExtractor):

View File

@ -1,11 +1,11 @@
# coding: utf-8 # coding: utf-8
import itertools import itertools
from datetime import datetime from datetime import datetime
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError,
float_or_none, float_or_none,
format_field, format_field,
int_or_none, int_or_none,
@ -16,6 +16,9 @@ from ..utils import (
) )
_API_BASE_URL = 'https://prod-api-v2.production.rokfin.com/api/v2/public/'
class RokfinIE(InfoExtractor): class RokfinIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)' _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)'
_TESTS = [{ _TESTS = [{
@ -82,8 +85,7 @@ class RokfinIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id, video_type = self._match_valid_url(url).group('id', 'type') video_id, video_type = self._match_valid_url(url).group('id', 'type')
metadata = self._download_json(f'https://prod-api-v2.production.rokfin.com/api/v2/public/{video_id}', metadata = self._download_json(f'{_API_BASE_URL}{video_id}', video_id)
video_id, fatal=False) or {}
scheduled = unified_timestamp(metadata.get('scheduledAt')) scheduled = unified_timestamp(metadata.get('scheduledAt'))
live_status = ('was_live' if metadata.get('stoppedAt') live_status = ('was_live' if metadata.get('stoppedAt')
@ -137,7 +139,7 @@ class RokfinIE(InfoExtractor):
pages_total = None pages_total = None
for page_n in itertools.count(): for page_n in itertools.count():
raw_comments = self._download_json( raw_comments = self._download_json(
f'https://prod-api-v2.production.rokfin.com/api/v2/public/comment?postId={video_id[5:]}&page={page_n}&size=50', f'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50',
video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, template=" of %s")}', video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, template=" of %s")}',
fatal=False) or {} fatal=False) or {}
@ -153,6 +155,102 @@ class RokfinIE(InfoExtractor):
'timestamp': unified_timestamp(comment.get('postedAt')) 'timestamp': unified_timestamp(comment.get('postedAt'))
} }
pages_total = int_or_none(raw_comments.get('totalPages')) pages_total = int_or_none(raw_comments.get('totalPages')) or None
if not raw_comments.get('content') or raw_comments.get('last') is not False or page_n > (pages_total or 0): is_last = raw_comments.get('last')
if not raw_comments.get('content') or is_last or (page_n > pages_total if pages_total else is_last is not False):
return return
class RokfinPlaylistBaseIE(InfoExtractor):
_TYPES = {
'video': 'post',
'audio': 'post',
'stream': 'stream',
'dead_stream': 'stream',
'stack': 'stack',
}
def _get_video_data(self, metadata):
for content in metadata.get('content') or []:
media_type = self._TYPES.get(content.get('mediaType'))
video_id = content.get('id') if media_type == 'post' else content.get('mediaId')
if not media_type or not video_id:
continue
yield self.url_result(f'https://rokfin.com/{media_type}/{video_id}', video_id=f'{media_type}/{video_id}',
video_title=str_or_none(traverse_obj(content, ('content', 'contentTitle'))))
class RokfinStackIE(RokfinPlaylistBaseIE):
IE_NAME = 'rokfin:stack'
_VALID_URL = r'https?://(?:www\.)?rokfin\.com/stack/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://www.rokfin.com/stack/271/Tulsi-Gabbard-Portsmouth-Townhall-FULL--Feb-9-2020',
'playlist_count': 8,
'info_dict': {
'id': '271',
},
}]
def _real_extract(self, url):
list_id = self._match_id(url)
return self.playlist_result(self._get_video_data(
self._download_json(f'{_API_BASE_URL}stack/{list_id}', list_id)), list_id)
class RokfinChannelIE(RokfinPlaylistBaseIE):
IE_NAME = 'rokfin:channel'
_VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?!((feed/?)|(discover/?)|(channels/?))$)(?P<id>[^/]+)/?$'
_TESTS = [{
'url': 'https://rokfin.com/TheConvoCouch',
'playlist_mincount': 100,
'info_dict': {
'id': '12071-new',
'title': 'TheConvoCouch - New',
'description': 'md5:bb622b1bca100209b91cd685f7847f06',
},
}]
_TABS = {
'new': 'posts',
'top': 'top',
'videos': 'video',
'podcasts': 'audio',
'streams': 'stream',
'stacks': 'stack',
}
def _real_initialize(self):
self._validate_extractor_args()
def _validate_extractor_args(self):
requested_tabs = self._configuration_arg('tab', None)
if requested_tabs is not None and (len(requested_tabs) > 1 or requested_tabs[0] not in self._TABS):
raise ExtractorError(f'Invalid extractor-arg "tab". Must be one of {", ".join(self._TABS)}', expected=True)
def _entries(self, channel_id, channel_name, tab):
pages_total = None
for page_n in itertools.count(0):
if tab in ('posts', 'top'):
data_url = f'{_API_BASE_URL}user/{channel_name}/{tab}?page={page_n}&size=50'
else:
data_url = f'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}'
metadata = self._download_json(
data_url, channel_name,
note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, template=" of %s")}')
yield from self._get_video_data(metadata)
pages_total = int_or_none(metadata.get('totalPages')) or None
is_last = metadata.get('last')
if is_last or (page_n > pages_total if pages_total else is_last is not False):
return
def _real_extract(self, url):
channel_name = self._match_id(url)
channel_info = self._download_json(f'{_API_BASE_URL}user/{channel_name}', channel_name)
channel_id = channel_info['id']
tab = self._configuration_arg('tab', default=['new'])[0]
return self.playlist_result(
self._entries(channel_id, channel_name, self._TABS[tab]),
f'{channel_id}-{tab}', f'{channel_name} - {tab.title()}', str_or_none(channel_info.get('description')))

View File

@ -111,8 +111,7 @@ class ThreeQSDNIE(InfoExtractor):
subtitles = self._merge_subtitles(subtitles, subs) subtitles = self._merge_subtitles(subtitles, subs)
elif source_type == 'hls': elif source_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native', source, video_id, 'mp4', live=live, m3u8_id='hls', fatal=False)
m3u8_id='hls', fatal=False)
formats.extend(fmts) formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs) subtitles = self._merge_subtitles(subtitles, subs)
elif source_type == 'progressive': elif source_type == 'progressive':

View File

@ -81,9 +81,7 @@ class TV2IE(InfoExtractor):
elif ext == 'm3u8': elif ext == 'm3u8':
if not data.get('drmProtected'): if not data.get('drmProtected'):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', video_url, video_id, 'mp4', live=is_live, m3u8_id=format_id, fatal=False))
'm3u8' if is_live else 'm3u8_native',
m3u8_id=format_id, fatal=False))
elif ext == 'mpd': elif ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
video_url, video_id, format_id, fatal=False)) video_url, video_id, format_id, fatal=False))
@ -244,9 +242,7 @@ class KatsomoIE(InfoExtractor):
elif ext == 'm3u8': elif ext == 'm3u8':
if not data.get('drmProtected'): if not data.get('drmProtected'):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', video_url, video_id, 'mp4', live=is_live, m3u8_id=format_id, fatal=False))
'm3u8' if is_live else 'm3u8_native',
m3u8_id=format_id, fatal=False))
elif ext == 'mpd': elif ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
video_url, video_id, format_id, fatal=False)) video_url, video_id, format_id, fatal=False))

View File

@ -111,9 +111,7 @@ class TVNetIE(InfoExtractor):
continue continue
stream_urls.add(stream_url) stream_urls.add(stream_url)
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
stream_url, video_id, 'mp4', stream_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
entry_protocol='m3u8' if is_live else 'm3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
# better support for radio streams # better support for radio streams

View File

@ -195,9 +195,7 @@ class VGTVIE(XstreamIE):
hls_url = streams.get('hls') hls_url = streams.get('hls')
if hls_url: if hls_url:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
hls_url, video_id, 'mp4', hls_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
entry_protocol='m3u8' if is_live else 'm3u8_native',
m3u8_id='hls', fatal=False))
hds_url = streams.get('hds') hds_url = streams.get('hds')
if hds_url: if hds_url:

View File

@ -166,8 +166,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
for f_id, m_url in sep_manifest_urls: for f_id, m_url in sep_manifest_urls:
if files_type == 'hls': if files_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
m_url, video_id, 'mp4', m_url, video_id, 'mp4', live=is_live, m3u8_id=f_id,
'm3u8' if is_live else 'm3u8_native', m3u8_id=f_id,
note='Downloading %s m3u8 information' % cdn_name, note='Downloading %s m3u8 information' % cdn_name,
fatal=False) fatal=False)
formats.extend(fmts) formats.extend(fmts)

View File

@ -2799,13 +2799,14 @@ class PagedList:
def __init__(self, pagefunc, pagesize, use_cache=True): def __init__(self, pagefunc, pagesize, use_cache=True):
self._pagefunc = pagefunc self._pagefunc = pagefunc
self._pagesize = pagesize self._pagesize = pagesize
self._pagecount = float('inf')
self._use_cache = use_cache self._use_cache = use_cache
self._cache = {} self._cache = {}
def getpage(self, pagenum): def getpage(self, pagenum):
page_results = self._cache.get(pagenum) page_results = self._cache.get(pagenum)
if page_results is None: if page_results is None:
page_results = list(self._pagefunc(pagenum)) page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
if self._use_cache: if self._use_cache:
self._cache[pagenum] = page_results self._cache[pagenum] = page_results
return page_results return page_results
@ -2817,7 +2818,7 @@ class PagedList:
raise NotImplementedError('This method must be implemented by subclasses') raise NotImplementedError('This method must be implemented by subclasses')
def __getitem__(self, idx): def __getitem__(self, idx):
# NOTE: cache must be enabled if this is used assert self._use_cache, 'Indexing PagedList requires cache'
if not isinstance(idx, int) or idx < 0: if not isinstance(idx, int) or idx < 0:
raise TypeError('indices must be non-negative integers') raise TypeError('indices must be non-negative integers')
entries = self.getslice(idx, idx + 1) entries = self.getslice(idx, idx + 1)
@ -2843,7 +2844,11 @@ class OnDemandPagedList(PagedList):
if (end is not None and firstid <= end <= nextfirstid) if (end is not None and firstid <= end <= nextfirstid)
else None) else None)
try:
page_results = self.getpage(pagenum) page_results = self.getpage(pagenum)
except Exception:
self._pagecount = pagenum - 1
raise
if startv != 0 or endv is not None: if startv != 0 or endv is not None:
page_results = page_results[startv:endv] page_results = page_results[startv:endv]
yield from page_results yield from page_results
@ -2863,8 +2868,8 @@ class OnDemandPagedList(PagedList):
class InAdvancePagedList(PagedList): class InAdvancePagedList(PagedList):
def __init__(self, pagefunc, pagecount, pagesize): def __init__(self, pagefunc, pagecount, pagesize):
self._pagecount = pagecount
PagedList.__init__(self, pagefunc, pagesize, True) PagedList.__init__(self, pagefunc, pagesize, True)
self._pagecount = pagecount
def _getslice(self, start, end): def _getslice(self, start, end):
start_page = start // self._pagesize start_page = start // self._pagesize