From c364f15ff1b86a0068cbec4f6782a4baf7a06152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 19 Jul 2013 09:43:43 +0200 Subject: [PATCH] Add WeiboIE (closes #1039) It just embed video from other sites. Modified the _VALID_URL of Youku to catch embed urls. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/weibo.py | 37 ++++++++++++++++++++++++++++++++ youtube_dl/extractor/youku.py | 2 +- 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/weibo.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 30d55d4460..749fa65584 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -73,6 +73,7 @@ from .vimeo import VimeoIE from .vine import VineIE from .c56 import C56IE from .wat import WatIE +from .weibo import WeiboIE from .wimp import WimpIE from .worldstarhiphop import WorldStarHipHopIE from .xhamster import XHamsterIE diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py new file mode 100644 index 0000000000..efcb5912b4 --- /dev/null +++ b/youtube_dl/extractor/weibo.py @@ -0,0 +1,37 @@ +# coding: utf-8 + +import re + +from .common import InfoExtractor + +class WeiboIE(InfoExtractor): + """ + The videos in Weibo come from different sites, this IE just finds the link + to the external video and returns it. + """ + _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P.+?)\.htm' + + _TEST = { + u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm', + u'file': u'98322879.flv', + u'info_dict': { + u'title': u'魔声耳机最新广告“All Eyes On Us”', + }, + u'note': u'Sina video', + u'params': { + u'skip_download': True, + }, + } + + # Additional example videos from different sites + # Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm + # 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + player_url = self._search_regex(r'var defaultPlayer="(.+?)"', webpage, + u'player url') + return self.url_result(player_url) + diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index eb98298019..996d384784 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -13,7 +13,7 @@ from ..utils import ( class YoukuIE(InfoExtractor): - _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P[A-Za-z0-9]+)\.html' + _VALID_URL = r'(?:http://)?(v|player)\.youku\.com/(v_show/id_|player\.php/sid/)(?P[A-Za-z0-9]+)(\.html|/v.swf)' _TEST = { u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", u"file": u"XNDgyMDQ2NTQw_part00.flv",