# coding: utf-8 from __future__ import unicode_literals import re from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( float_or_none, month_by_abbreviation, ExtractorError, ) class YamIE(InfoExtractor): _VALID_URL = r'http://mymedia.yam.com/m/(?P\d+)' _TESTS = [{ # An audio hosted on Yam 'url': 'http://mymedia.yam.com/m/2283921', 'md5': 'c011b8e262a52d5473d9c2e3c9963b9c', 'info_dict': { 'id': '2283921', 'ext': 'mp3', 'title': '發現 - 趙薇 京華煙雲主題曲', 'uploader_id': 'princekt', 'upload_date': '20080807', 'duration': 313.0, } }, { # An external video hosted on YouTube 'url': 'http://mymedia.yam.com/m/3599430', 'md5': '03127cf10d8f35d120a9e8e52e3b17c6', 'info_dict': { 'id': 'CNpEoQlrIgA', 'ext': 'mp4', 'upload_date': '20150306', 'uploader': '新莊社大瑜伽社', 'description': 'md5:11e2e405311633ace874f2e6226c8b17', 'uploader_id': '2323agoy', 'title': '20090412陽明山二子坪-1', } }, { 'url': 'http://mymedia.yam.com/m/3598173', 'info_dict': { 'id': '3598173', 'ext': 'mp4', }, 'skip': 'cause Yam system error', }, { 'url': 'http://mymedia.yam.com/m/3599437', 'info_dict': { 'id': '3599437', 'ext': 'mp4', }, 'skip': 'invalid YouTube URL', }] def _real_extract(self, url): video_id = self._match_id(url) page = self._download_webpage(url, video_id) # Check for errors system_msg = self._html_search_regex( r'系統訊息(?:
|\n|\r)*([^<>]+)
', page, 'system message', default=None) if system_msg: raise ExtractorError(system_msg, expected=True) # Is it hosted externally on YouTube? youtube_url = self._html_search_regex( r':[\n ]+(?P[A-Z][a-z]{2}) ' + r'(?P\d{1,2}), (?P\d{4})', page) if mobj: upload_date = '%s%02d%02d' % ( mobj.group('year'), month_by_abbreviation(mobj.group('mon')), int(mobj.group('day'))) else: upload_date = None duration = float_or_none(api_result_obj['totaltime'][0], scale=1000) return { 'id': video_id, 'url': api_result_obj['mp3file'][0], 'title': self._html_search_meta('description', page), 'duration': duration, 'uploader_id': uploader_id, 'upload_date': upload_date, }