From b1aaf1c07fae6f6e5d4bc966016f5e5df7b6c83c Mon Sep 17 00:00:00 2001 From: Jertzukka Date: Thu, 9 Dec 2021 14:25:30 +0200 Subject: [PATCH] [gofile] Add extractor (#1850) Closes #1831 Authored by: Jertzukka, Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/gofile.py | 83 ++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 yt_dlp/extractor/gofile.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index e280c71ce0..8d7c54ec4f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -523,6 +523,7 @@ from .globo import ( ) from .go import GoIE from .godtube import GodTubeIE +from .gofile import GofileIE from .golem import GolemIE from .googledrive import GoogleDriveIE from .googlepodcasts import ( diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py new file mode 100644 index 0000000000..62d778cfec --- /dev/null +++ b/yt_dlp/extractor/gofile.py @@ -0,0 +1,83 @@ +# coding: utf-8 +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + try_get +) + + +class GofileIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gofile\.io/d/(?P[^/]+)' + _TESTS = [{ + 'url': 'https://gofile.io/d/AMZyDw', + 'info_dict': { + 'id': 'AMZyDw', + }, + 'playlist_mincount': 2, + 'playlist': [{ + 'info_dict': { + 'id': 'de571ac1-5edc-42e2-8ec2-bdac83ad4a31', + 'filesize': 928116, + 'ext': 'mp4', + 'title': 'nuuh' + } + }] + }, { # URL to test mixed file types + 'url': 'https://gofile.io/d/avt34h', + 'info_dict': { + 'id': 'avt34h', + }, + 'playlist_mincount': 1, + }, { # URL to test no video/audio error + 'url': 'https://gofile.io/d/aB03lZ', + 'info_dict': { + 'id': 'aB03lZ', + }, + 'playlist_count': 0, + 'skip': 'No video/audio found at provided URL.', + }] + _TOKEN = None + + def _real_initialize(self): + token = self._get_cookies('https://gofile.io/').get('accountToken') + if token: + self._TOKEN = token.value + return + + account_data = self._download_json( + 'https://api.gofile.io/createAccount', None, note='Getting a new guest account') + self._TOKEN = account_data['data']['token'] + self._set_cookie('gofile.io', 'accountToken', self._TOKEN) + + def _entries(self, file_id): + files = self._download_json( + f'https://api.gofile.io/getContent?contentId={file_id}&token={self._TOKEN}&websiteToken=websiteToken&cache=true', + 'Gofile', note='Getting filelist') + + status = files['status'] + if status != 'ok': + raise ExtractorError(f'{self.IE_NAME} said: status {status}', expected=True) + + found_files = False + for file in (try_get(files, lambda x: x['data']['contents'], dict) or {}).values(): + file_type, file_format = file.get('mimetype').split('/', 1) + if file_type not in ('video', 'audio') and file_format != 'vnd.mts': + continue + + found_files = True + file_url = file.get('directLink') + if file_url: + yield { + 'id': file['id'], + 'title': file['name'].rsplit('.', 1)[0], + 'url': file_url, + 'filesize': file.get('size'), + 'release_timestamp': file.get('createTime') + } + + if not found_files: + raise ExtractorError('No video/audio found at provided URL.', expected=True) + + def _real_extract(self, url): + file_id = self._match_id(url) + return self.playlist_result(self._entries(file_id), playlist_id=file_id)