yt-dlp/yt_dlp/extractor/pialive.py

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    clean_html,
    extract_attributes,
    get_element_by_class,
    get_element_html_by_class,
    multipart_encode,
    unified_timestamp,
    url_or_none,
)
from ..utils.traversal import traverse_obj


class PiaLiveIE(InfoExtractor):
    PLAYER_ROOT_URL = 'https://player.pia-live.jp/'
    PIA_LIVE_API_URL = 'https://api.pia-live.jp'
    API_KEY = 'kfds)FKFps-dms9e'
    _VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)'

    _TESTS = [
        {
            'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76',
            'info_dict': {
                'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
                'display_id': '2431867_001',
                'title': 'こながめでたい日２０２４の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
                'live_status': 'was_live',
                'comment_count': int,
            },
            'params': {
                'getcomments': True,
                'skip_download': True,
                'ignore_no_formats_error': True,
            },
            'skip': 'The video is no longer available',
        },
        {
            'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
            'info_dict': {
                'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93',
                'display_id': '2431867_002',
                'title': 'こながめでたい日２０２４の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
                'live_status': 'was_live',
                'comment_count': int,
            },
            'params': {
                'getcomments': True,
                'skip_download': True,
                'ignore_no_formats_error': True,
            },
            'skip': 'The video is no longer available',
        },
    ]

    def _extract_vars(self, variable, html):
        return self._search_regex(
            rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
            html, f'variable {variable}', group='value')

    def _real_extract(self, url):
        video_key = self._match_id(url)
        webpage = self._download_webpage(url, video_key)

        program_code = self._extract_vars('programCode', webpage)
        article_code = self._extract_vars('articleCode', webpage)
        title = self._html_extract_title(webpage)

        if get_element_html_by_class('play-end', webpage):
            raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)

        if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
            date, time = self._search_regex(
                r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
                start_info, 'start_info', fatal=False, group=('date', 'time'))
            if all((date, time)):
                release_timestamp_str = f'{date} {time} +09:00'
                release_timestamp = unified_timestamp(release_timestamp_str)
                self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
                return {
                    'id': program_code,
                    'title': title,
                    'live_status': 'is_upcoming',
                    'release_timestamp': release_timestamp,
                }

        payload, content_type = multipart_encode({
            'play_url': video_key,
            'api_key': self.API_KEY,
        })
        api_kwargs = {
            'video_id': program_code,
            'data': payload,
            'headers': {'Content-Type': content_type, 'Referer': self.PLAYER_ROOT_URL},
        }

        player_tag_list = self._download_json(
            f'{self.PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', **api_kwargs,
            note='Fetching player tag list', errnote='Unable to fetch player tag list')
        if self.get_param('getcomments'):
            chat_room_url = traverse_obj(self._download_json(
                f'{self.PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', **api_kwargs,
                note='Fetching chat info', errnote='Unable to fetch chat info', fatal=False),
                ('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
        else:
            chat_room_url = None

        return self.url_result(
            extract_attributes(player_tag_list['data']['movie_one_tag'])['src'], url_transparent=True,
            video_title=title, display_id=program_code, __post_extractor=self.extract_comments(
                program_code, chat_room_url))

    def _get_comments(self, video_id, chat_room_url):
        if not chat_room_url:
            return
        if comment_page := self._download_webpage(
                chat_room_url, video_id, headers={'Referer': f'{self.PLAYER_ROOT_URL}'},
                note='Fetching comment page', errnote='Unable to fetch comment page', fatal=False):
            yield from traverse_obj(self._search_json(
                r'var\s+_history\s*=', comment_page, 'comment list',
                video_id, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., {
                    'timestamp': 0,
                    'author_is_uploader': (1, {lambda x: x == 2}),
                    'author': 2,
                    'text': 3,
                    'id': 4,
                }))
feat: add support for uliza in pia-live 2024-08-21 03:43:45 +00:00			`from .common import InfoExtractor`
[ie/pialive] Follow your steps (#1) * [ie/pialive] Support detecting upcoming and ended live events * Pack API arguments * fix UnboundLocalError for "chat_room_url" * extract video_id from query string by "parse_qs()" * Fix tests 2024-09-25 06:47:15 +00:00			`from ..utils import (`
			`ExtractorError,`
			`clean_html,`
			`extract_attributes,`
			`get_element_by_class,`
			`get_element_html_by_class,`
			`multipart_encode,`
			`unified_timestamp,`
			`url_or_none,`
			`)`
fix: code style 2024-08-27 08:35:10 +00:00			`from ..utils.traversal import traverse_obj`
feat: add support for uliza in pia-live 2024-08-21 03:43:45 +00:00

			`class PiaLiveIE(InfoExtractor):`
chore: remove smuggled_url 2024-08-23 15:12:18 +00:00			`PLAYER_ROOT_URL = 'https://player.pia-live.jp/'`
feat: add support for uliza in pia-live 2024-08-21 03:43:45 +00:00			`PIA_LIVE_API_URL = 'https://api.pia-live.jp'`
fix: error 2024-09-09 13:27:29 +00:00			`API_KEY = 'kfds)FKFps-dms9e'`
feat: add support for uliza in pia-live 2024-08-21 03:43:45 +00:00			`_VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)'`

chore: use url instead of Request and add tests 2024-08-22 03:59:32 +00:00			`_TESTS = [`
			`{`
			`'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76',`
			`'info_dict': {`
chore: correct tests 2024-08-24 09:44:14 +00:00			`'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',`
			`'display_id': '2431867_001',`
chore: use url instead of Request and add tests 2024-08-22 03:59:32 +00:00			`'title': 'こながめでたい日２０２４の視聴ページ \| PIA LIVE STREAM(ぴあライブストリーム)',`
			`'live_status': 'was_live',`
feat: use extract_comments instead 2024-08-25 15:26:37 +00:00			`'comment_count': int,`
chore: use url instead of Request and add tests 2024-08-22 03:59:32 +00:00			`},`
			`'params': {`
feat: use extract_comments instead 2024-08-25 15:26:37 +00:00			`'getcomments': True,`
chore: use url instead of Request and add tests 2024-08-22 03:59:32 +00:00			`'skip_download': True,`
			`'ignore_no_formats_error': True,`
			`},`
[ie/pialive] Follow your steps (#1) * [ie/pialive] Support detecting upcoming and ended live events * Pack API arguments * fix UnboundLocalError for "chat_room_url" * extract video_id from query string by "parse_qs()" * Fix tests 2024-09-25 06:47:15 +00:00			`'skip': 'The video is no longer available',`
chore: use url instead of Request and add tests 2024-08-22 03:59:32 +00:00			`},`
			`{`
			`'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',`
			`'info_dict': {`
chore: correct tests 2024-08-24 09:44:14 +00:00			`'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93',`
			`'display_id': '2431867_002',`
chore: use url instead of Request and add tests 2024-08-22 03:59:32 +00:00			`'title': 'こながめでたい日２０２４の視聴ページ \| PIA LIVE STREAM(ぴあライブストリーム)',`
			`'live_status': 'was_live',`
feat: use extract_comments instead 2024-08-25 15:26:37 +00:00			`'comment_count': int,`
chore: use url instead of Request and add tests 2024-08-22 03:59:32 +00:00			`},`
			`'params': {`
feat: use extract_comments instead 2024-08-25 15:26:37 +00:00			`'getcomments': True,`
chore: use url instead of Request and add tests 2024-08-22 03:59:32 +00:00			`'skip_download': True,`
			`'ignore_no_formats_error': True,`
			`},`
[ie/pialive] Follow your steps (#1) * [ie/pialive] Support detecting upcoming and ended live events * Pack API arguments * fix UnboundLocalError for "chat_room_url" * extract video_id from query string by "parse_qs()" * Fix tests 2024-09-25 06:47:15 +00:00			`'skip': 'The video is no longer available',`
chore: use url instead of Request and add tests 2024-08-22 03:59:32 +00:00			`},`
			`]`

feat: use extract_comments instead 2024-08-25 15:26:37 +00:00			`def _extract_vars(self, variable, html):`
			`return self._search_regex(`
Apply suggestions from code review Co-authored-by: sepro <sepro@sepr0.com> 2024-09-09 13:13:03 +00:00			`rf'(?:var\|const\|let)\s+{variable}\s=\s(["\'])(?P<value>(?:(?!\1).)+)\1',`
			`html, f'variable {variable}', group='value')`
feat: use extract_comments instead 2024-08-25 15:26:37 +00:00
feat: add support for uliza in pia-live 2024-08-21 03:43:45 +00:00			`def _real_extract(self, url):`
			`video_key = self._match_id(url)`
			`webpage = self._download_webpage(url, video_key)`
feat: use extract_comments instead 2024-08-25 15:26:37 +00:00
			`program_code = self._extract_vars('programCode', webpage)`
			`article_code = self._extract_vars('articleCode', webpage)`
[ie/pialive] Follow your steps (#1) * [ie/pialive] Support detecting upcoming and ended live events * Pack API arguments * fix UnboundLocalError for "chat_room_url" * extract video_id from query string by "parse_qs()" * Fix tests 2024-09-25 06:47:15 +00:00			`title = self._html_extract_title(webpage)`

			`if get_element_html_by_class('play-end', webpage):`
			`raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)`

			`if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):`
			`date, time = self._search_regex(`
			`r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',`
			`start_info, 'start_info', fatal=False, group=('date', 'time'))`
			`if all((date, time)):`
			`release_timestamp_str = f'{date} {time} +09:00'`
			`release_timestamp = unified_timestamp(release_timestamp_str)`
			`self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)`
			`return {`
			`'id': program_code,`
			`'title': title,`
			`'live_status': 'is_upcoming',`
			`'release_timestamp': release_timestamp,`
			`}`
feat: add support for uliza in pia-live 2024-08-21 03:43:45 +00:00
			`payload, content_type = multipart_encode({`
			`'play_url': video_key,`
Apply suggestions from code review Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com> 2024-09-11 01:15:34 +00:00			`'api_key': self.API_KEY,`
			`})`
[ie/pialive] Follow your steps (#1) * [ie/pialive] Support detecting upcoming and ended live events * Pack API arguments * fix UnboundLocalError for "chat_room_url" * extract video_id from query string by "parse_qs()" * Fix tests 2024-09-25 06:47:15 +00:00			`api_kwargs = {`
			`'video_id': program_code,`
			`'data': payload,`
			`'headers': {'Content-Type': content_type, 'Referer': self.PLAYER_ROOT_URL},`
			`}`
feat: use extract_comments instead 2024-08-25 15:26:37 +00:00
feat: add support for uliza in pia-live 2024-08-21 03:43:45 +00:00			`player_tag_list = self._download_json(`
[ie/pialive] Follow your steps (#1) * [ie/pialive] Support detecting upcoming and ended live events * Pack API arguments * fix UnboundLocalError for "chat_room_url" * extract video_id from query string by "parse_qs()" * Fix tests 2024-09-25 06:47:15 +00:00			`f'{self.PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', **api_kwargs,`
feat: use extract_comments instead 2024-08-25 15:26:37 +00:00			`note='Fetching player tag list', errnote='Unable to fetch player tag list')`
fix: error 2024-09-09 13:27:29 +00:00			`if self.get_param('getcomments'):`
			`chat_room_url = traverse_obj(self._download_json(`
[ie/pialive] Follow your steps (#1) * [ie/pialive] Support detecting upcoming and ended live events * Pack API arguments * fix UnboundLocalError for "chat_room_url" * extract video_id from query string by "parse_qs()" * Fix tests 2024-09-25 06:47:15 +00:00			`f'{self.PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', **api_kwargs,`
fix: error 2024-09-09 13:27:29 +00:00			`note='Fetching chat info', errnote='Unable to fetch chat info', fatal=False),`
			`('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))`
[ie/pialive] Follow your steps (#1) * [ie/pialive] Support detecting upcoming and ended live events * Pack API arguments * fix UnboundLocalError for "chat_room_url" * extract video_id from query string by "parse_qs()" * Fix tests 2024-09-25 06:47:15 +00:00			`else:`
			`chat_room_url = None`
feat: fetch comments 2024-08-22 15:06:16 +00:00
fix: remove unnecessary code 2024-08-24 15:34:06 +00:00			`return self.url_result(`
feat: use extract_comments instead 2024-08-25 15:26:37 +00:00			`extract_attributes(player_tag_list['data']['movie_one_tag'])['src'], url_transparent=True,`
[ie/pialive] Follow your steps (#1) * [ie/pialive] Support detecting upcoming and ended live events * Pack API arguments * fix UnboundLocalError for "chat_room_url" * extract video_id from query string by "parse_qs()" * Fix tests 2024-09-25 06:47:15 +00:00			`video_title=title, display_id=program_code, __post_extractor=self.extract_comments(`
			`program_code, chat_room_url))`
feat: use extract_comments instead 2024-08-25 15:26:37 +00:00
			`def _get_comments(self, video_id, chat_room_url):`
			`if not chat_room_url:`
			`return`
			`if comment_page := self._download_webpage(`
			`chat_room_url, video_id, headers={'Referer': f'{self.PLAYER_ROOT_URL}'},`
			`note='Fetching comment page', errnote='Unable to fetch comment page', fatal=False):`
			`yield from traverse_obj(self._search_json(`
			`r'var\s+_history\s*=', comment_page, 'comment list',`
			`video_id, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., {`
fix: code style 2024-08-27 08:35:10 +00:00			`'timestamp': 0,`
feat: use extract_comments instead 2024-08-25 15:26:37 +00:00			`'author_is_uploader': (1, {lambda x: x == 2}),`
fix: code style 2024-08-27 08:35:10 +00:00			`'author': 2,`
			`'text': 3,`
			`'id': 4,`
feat: use extract_comments instead 2024-08-25 15:26:37 +00:00			`}))`