From c6ddbdb66c5d6ead5e198013c54ef53d641063f1 Mon Sep 17 00:00:00 2001 From: Duncan Date: Sun, 10 May 2015 12:30:07 +1200 Subject: [PATCH] [voicerepublic] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/voicerepublic.py | 55 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/voicerepublic.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f117578a26..5cb3c304d1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -634,6 +634,7 @@ from .vk import ( VKUserVideosIE, ) from .vodlocker import VodlockerIE +from .voicerepublic import VoiceRepublicIE from .vporn import VpornIE from .vrt import VRTIE from .vube import VubeIE diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py new file mode 100644 index 0000000000..1a90693cb7 --- /dev/null +++ b/youtube_dl/extractor/voicerepublic.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..compat import ( + compat_urllib_request, +) + + +class VoiceRepublicIE(InfoExtractor): + _VALID_URL = r'https?://voicerepublic\.com/talks/(?P[0-9a-z-]+)' + _TEST = { + 'url': 'https://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', + 'md5': '0554a24d1657915aa8e8f84e15dc9353', + 'info_dict': { + 'id': '2296', + 'ext': 'm4a', + 'title': 'Watching the Watchers: Building a Sousveillance State', + 'thumbnail': 'https://voicerepublic.com/system/flyer/2296.png', + 'description': 'md5:715ba964958afa2398df615809cfecb1', + 'creator': 'M. C. McGrath', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + req = compat_urllib_request.Request(url) + # Older versions of Firefox get redirected to an "upgrade browser" page + req.add_header('User-Agent', 'youtube-dl') + webpage = self._download_webpage(req, display_id) + thumbnail = self._og_search_thumbnail(webpage) + video_id = self._search_regex(r'/(\d+)\.png', thumbnail, 'id') + + if '
', webpage, 'author', fatal=False), + }