From 66f4c04e50d9213522095247666d3d90345ad5d1 Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Wed, 8 Dec 2021 01:38:50 +0900 Subject: [PATCH] [extractor] Add `_search_nuxt_data` (#1921) Authored by: nao20010128nao --- yt_dlp/extractor/common.py | 18 ++++++++++++++++++ yt_dlp/extractor/sovietscloset.py | 13 +------------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 2180f879ce..d8fc5272c1 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1513,6 +1513,24 @@ class InfoExtractor(object): webpage, 'next.js data', **kw), video_id, **kw) + def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'): + ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. ''' + # not all website do this, but it can be changed + # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source + rectx = re.escape(context_name) + js, arg_keys, arg_vals = self._search_regex( + (r'' % rectx, + r'%s\(.*?\(function\((?P.*?)\)\{return\s(?P\{.*?\})\}\((?P.*?)\)' % rectx), + webpage, context_name, group=['js', 'arg_keys', 'arg_vals']) + + args = dict(zip(arg_keys.split(','), arg_vals.split(','))) + + for key, val in args.items(): + if val in ('undefined', 'void 0'): + args[key] = 'null' + + return self._parse_json(js_to_json(js, args), video_id)['data'][0] + @staticmethod def _hidden_inputs(html): html = re.sub(r'', '', html) diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index 7df23759ab..daf1c74503 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - js_to_json, try_get, unified_timestamp ) @@ -14,17 +13,7 @@ class SovietsClosetBaseIE(InfoExtractor): def parse_nuxt_jsonp(self, nuxt_jsonp_url, video_id, name): nuxt_jsonp = self._download_webpage(nuxt_jsonp_url, video_id, note=f'Downloading {name} __NUXT_JSONP__') - js, arg_keys, arg_vals = self._search_regex( - r'__NUXT_JSONP__\(.*?\(function\((?P.*?)\)\{return\s(?P\{.*?\})\}\((?P.*?)\)', - nuxt_jsonp, '__NUXT_JSONP__', group=['js', 'arg_keys', 'arg_vals']) - - args = dict(zip(arg_keys.split(','), arg_vals.split(','))) - - for key, val in args.items(): - if val in ('undefined', 'void 0'): - args[key] = 'null' - - return self._parse_json(js_to_json(js, args), video_id)['data'][0] + return self._search_nuxt_data(nuxt_jsonp, video_id, '__NUXT_JSONP__') def video_meta(self, video_id, game_name, category_name, episode_number, stream_date): title = game_name