mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-15 21:53:21 +00:00
Compare commits
No commits in common. "e3e606de12ea138825754290542559b888f72bb5" and "a904a7f8c6edc42046f0a78fb279739d500d4887" have entirely different histories.
e3e606de12
...
a904a7f8c6
@ -1161,11 +1161,14 @@ Note that options in configuration file are just the same options aka switches u
|
|||||||
|
|
||||||
You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded.
|
You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded.
|
||||||
|
|
||||||
### Config file encoding
|
### Specifying encoding of config files
|
||||||
|
|
||||||
The config files are decoded according to the UTF BOM if present, and in the encoding from system locale otherwise.
|
By default, config files are read in the encoding from system locale.
|
||||||
|
If you saved your config file in a different encoding than that, you may write `# coding: ENCODING` to the beginning of the file. (e.g. `# coding: shift-jis`)
|
||||||
|
|
||||||
If you want your file to be decoded differently, add `# coding: ENCODING` to the beginning of the file (e.g. `# coding: shift-jis`). There must be no characters before that, even spaces or BOM.
|
There must not be any characters before that, including spaces.
|
||||||
|
|
||||||
|
If you have BOM enabled, it will be used instead.
|
||||||
|
|
||||||
### Authentication with `.netrc` file
|
### Authentication with `.netrc` file
|
||||||
|
|
||||||
|
@ -1831,16 +1831,24 @@ Line 1
|
|||||||
self.assertEqual(determine_file_encoding(b'\x00\x00\xfe\xff'), ('utf-32-be', 4))
|
self.assertEqual(determine_file_encoding(b'\x00\x00\xfe\xff'), ('utf-32-be', 4))
|
||||||
self.assertEqual(determine_file_encoding(b'\xff\xfe'), ('utf-16-le', 2))
|
self.assertEqual(determine_file_encoding(b'\xff\xfe'), ('utf-16-le', 2))
|
||||||
|
|
||||||
self.assertEqual(determine_file_encoding(b'\xff\xfe# coding: utf-8\n--verbose'), ('utf-16-le', 2))
|
self.assertEqual(determine_file_encoding(b'# -*- coding: cp932 -*-'), ('cp932', 0))
|
||||||
|
self.assertEqual(determine_file_encoding(b'# -*- coding: cp932 -*-\n'), ('cp932', 0))
|
||||||
|
self.assertEqual(determine_file_encoding(b'# -*- coding: cp932 -*-\r\n'), ('cp932', 0))
|
||||||
|
|
||||||
self.assertEqual(determine_file_encoding(b'# coding: utf-8\n--verbose'), ('utf-8', 0))
|
self.assertEqual(determine_file_encoding(b'# coding: utf-8\n--verbose'), ('utf-8', 0))
|
||||||
self.assertEqual(determine_file_encoding(b'# coding: someencodinghere-12345\n--verbose'), ('someencodinghere-12345', 0))
|
self.assertEqual(determine_file_encoding(b'# coding: someencodinghere-12345\n--verbose'), ('someencodinghere-12345', 0))
|
||||||
|
|
||||||
self.assertEqual(determine_file_encoding(b'#coding:utf-8\n--verbose'), ('utf-8', 0))
|
self.assertEqual(determine_file_encoding(b'# vi: set fileencoding=cp932'), ('cp932', 0))
|
||||||
self.assertEqual(determine_file_encoding(b'# coding: utf-8 \r\n--verbose'), ('utf-8', 0))
|
self.assertEqual(determine_file_encoding(b'# vi: set fileencoding=cp932\n'), ('cp932', 0))
|
||||||
|
self.assertEqual(determine_file_encoding(b'# vi: set fileencoding=cp932\r\n'), ('cp932', 0))
|
||||||
|
self.assertEqual(determine_file_encoding(b'# vi: set fileencoding=cp932,euc-jp\r\n'), ('cp932', 0))
|
||||||
|
|
||||||
self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0))
|
self.assertEqual(determine_file_encoding(
|
||||||
self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0))
|
b'\0\0\0#\0\0\0 \0\0\0c\0\0\0o\0\0\0d\0\0\0i\0\0\0n\0\0\0g\0\0\0:\0\0\0 \0\0\0u\0\0\0t\0\0\0f\0\0\0-\0\0\x003\0\0\x002\0\0\0-\0\0\0b\0\0\0e'),
|
||||||
|
('utf-32-be', 0))
|
||||||
|
self.assertEqual(determine_file_encoding(
|
||||||
|
b'#\0 \0c\0o\0d\0i\0n\0g\0:\0 \0u\0t\0f\0-\x001\x006\0-\0l\0e\0'),
|
||||||
|
('utf-16-le', 0))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -1,17 +1,17 @@
|
|||||||
import hashlib
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import urllib.error
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
decode_base_n,
|
|
||||||
encode_base_n,
|
|
||||||
float_or_none,
|
|
||||||
format_field,
|
format_field,
|
||||||
|
float_or_none,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
@ -22,18 +22,6 @@ from ..utils import (
|
|||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
_ENCODING_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
|
|
||||||
|
|
||||||
|
|
||||||
def _pk_to_id(id):
|
|
||||||
"""Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id"""
|
|
||||||
return encode_base_n(int(id.split('_')[0]), table=_ENCODING_CHARS)
|
|
||||||
|
|
||||||
|
|
||||||
def _id_to_pk(shortcode):
|
|
||||||
"""Covert a shortcode to a numeric value"""
|
|
||||||
return decode_base_n(shortcode[:11], table=_ENCODING_CHARS)
|
|
||||||
|
|
||||||
|
|
||||||
class InstagramBaseIE(InfoExtractor):
|
class InstagramBaseIE(InfoExtractor):
|
||||||
_NETRC_MACHINE = 'instagram'
|
_NETRC_MACHINE = 'instagram'
|
||||||
@ -168,15 +156,6 @@ class InstagramBaseIE(InfoExtractor):
|
|||||||
if isinstance(product_info, list):
|
if isinstance(product_info, list):
|
||||||
product_info = product_info[0]
|
product_info = product_info[0]
|
||||||
|
|
||||||
comment_data = traverse_obj(product_info, ('edge_media_to_parent_comment', 'edges'))
|
|
||||||
comments = [{
|
|
||||||
'author': traverse_obj(comment_dict, ('node', 'owner', 'username')),
|
|
||||||
'author_id': traverse_obj(comment_dict, ('node', 'owner', 'id')),
|
|
||||||
'id': traverse_obj(comment_dict, ('node', 'id')),
|
|
||||||
'text': traverse_obj(comment_dict, ('node', 'text')),
|
|
||||||
'timestamp': traverse_obj(comment_dict, ('node', 'created_at'), expected_type=int_or_none),
|
|
||||||
} for comment_dict in comment_data] if comment_data else None
|
|
||||||
|
|
||||||
user_info = product_info.get('user') or {}
|
user_info = product_info.get('user') or {}
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': product_info.get('code') or product_info.get('id'),
|
'id': product_info.get('code') or product_info.get('id'),
|
||||||
@ -189,7 +168,6 @@ class InstagramBaseIE(InfoExtractor):
|
|||||||
'view_count': int_or_none(product_info.get('view_count')),
|
'view_count': int_or_none(product_info.get('view_count')),
|
||||||
'like_count': int_or_none(product_info.get('like_count')),
|
'like_count': int_or_none(product_info.get('like_count')),
|
||||||
'comment_count': int_or_none(product_info.get('comment_count')),
|
'comment_count': int_or_none(product_info.get('comment_count')),
|
||||||
'comments': comments,
|
|
||||||
'http_headers': {
|
'http_headers': {
|
||||||
'Referer': 'https://www.instagram.com/',
|
'Referer': 'https://www.instagram.com/',
|
||||||
}
|
}
|
||||||
@ -236,9 +214,23 @@ class InstagramIOSIE(InfoExtractor):
|
|||||||
'add_ie': ['Instagram']
|
'add_ie': ['Instagram']
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _get_id(self, id):
|
||||||
|
"""Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id"""
|
||||||
|
chrs = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
|
||||||
|
media_id = int(id.split('_')[0])
|
||||||
|
shortened_id = ''
|
||||||
|
while media_id > 0:
|
||||||
|
r = media_id % 64
|
||||||
|
media_id = (media_id - r) // 64
|
||||||
|
shortened_id = chrs[r] + shortened_id
|
||||||
|
return shortened_id
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = _pk_to_id(self._match_id(url))
|
return {
|
||||||
return self.url_result(f'http://instagram.com/tv/{video_id}', InstagramIE, video_id)
|
'_type': 'url_transparent',
|
||||||
|
'url': f'http://instagram.com/tv/{self._get_id(self._match_id(url))}/',
|
||||||
|
'ie_key': 'Instagram',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class InstagramIE(InstagramBaseIE):
|
class InstagramIE(InstagramBaseIE):
|
||||||
@ -366,49 +358,39 @@ class InstagramIE(InstagramBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, url = self._match_valid_url(url).group('id', 'url')
|
video_id, url = self._match_valid_url(url).group('id', 'url')
|
||||||
general_info = self._download_json(
|
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||||
f'https://www.instagram.com/graphql/query/?query_hash=9f8827793ef34641b2fb195d4d41151c'
|
if 'www.instagram.com/accounts/login' in urlh.geturl():
|
||||||
f'&variables=%7B"shortcode":"{video_id}",'
|
self.report_warning('Main webpage is locked behind the login page. '
|
||||||
'"parent_comment_count":10,"has_threaded_comments":true}', video_id, fatal=False, errnote=False,
|
'Retrying with embed webpage (Note that some metadata might be missing)')
|
||||||
headers={
|
|
||||||
'Accept': '*',
|
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
|
|
||||||
'Authority': 'www.instagram.com',
|
|
||||||
'Referer': 'https://www.instagram.com',
|
|
||||||
'x-ig-app-id': '936619743392459',
|
|
||||||
})
|
|
||||||
media = traverse_obj(general_info, ('data', 'shortcode_media')) or {}
|
|
||||||
if not media:
|
|
||||||
self.report_warning('General metadata extraction failed', video_id)
|
|
||||||
|
|
||||||
info = self._download_json(
|
|
||||||
f'https://i.instagram.com/api/v1/media/{_id_to_pk(video_id)}/info/', video_id,
|
|
||||||
fatal=False, note='Downloading video info', errnote=False, headers={
|
|
||||||
'Accept': '*',
|
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
|
|
||||||
'Authority': 'www.instagram.com',
|
|
||||||
'Referer': 'https://www.instagram.com',
|
|
||||||
'x-ig-app-id': '936619743392459',
|
|
||||||
})
|
|
||||||
if info:
|
|
||||||
media.update(info['items'][0])
|
|
||||||
return self._extract_product(media)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
f'https://www.instagram.com/p/{video_id}/embed/', video_id,
|
'https://www.instagram.com/p/%s/embed/' % video_id, video_id, note='Downloading embed webpage')
|
||||||
note='Downloading embed webpage', fatal=False)
|
|
||||||
if not webpage:
|
|
||||||
self.raise_login_required('Requested content was not found, the content might be private')
|
|
||||||
|
|
||||||
additional_data = self._search_json(
|
shared_data = self._parse_json(
|
||||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*', webpage, 'additional data', video_id, fatal=False)
|
self._search_regex(
|
||||||
|
r'window\._sharedData\s*=\s*({.+?});',
|
||||||
|
webpage, 'shared data', default='{}'),
|
||||||
|
video_id, fatal=False)
|
||||||
|
media = traverse_obj(
|
||||||
|
shared_data,
|
||||||
|
('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'),
|
||||||
|
('entry_data', 'PostPage', 0, 'media'),
|
||||||
|
expected_type=dict)
|
||||||
|
|
||||||
|
# _sharedData.entry_data.PostPage is empty when authenticated (see
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/pull/22880)
|
||||||
|
if not media:
|
||||||
|
additional_data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\);',
|
||||||
|
webpage, 'additional data', default='{}'),
|
||||||
|
video_id, fatal=False)
|
||||||
product_item = traverse_obj(additional_data, ('items', 0), expected_type=dict)
|
product_item = traverse_obj(additional_data, ('items', 0), expected_type=dict)
|
||||||
if product_item:
|
if product_item:
|
||||||
media.update(product_item)
|
return self._extract_product(product_item)
|
||||||
return self._extract_product(media)
|
media = traverse_obj(additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {}
|
||||||
|
|
||||||
media.update(traverse_obj(
|
if not media and 'www.instagram.com/accounts/login' in urlh.geturl():
|
||||||
additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {})
|
self.raise_login_required('You need to log in to access this content')
|
||||||
|
|
||||||
username = traverse_obj(media, ('owner', 'username')) or self._search_regex(
|
username = traverse_obj(media, ('owner', 'username')) or self._search_regex(
|
||||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False)
|
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False)
|
||||||
@ -537,7 +519,7 @@ class InstagramPlaylistBaseIE(InstagramBaseIE):
|
|||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
# if it's an error caused by a bad query, and there are
|
# if it's an error caused by a bad query, and there are
|
||||||
# more GIS templates to try, ignore it and keep trying
|
# more GIS templates to try, ignore it and keep trying
|
||||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
if gis_tmpl != gis_tmpls[-1]:
|
if gis_tmpl != gis_tmpls[-1]:
|
||||||
continue
|
continue
|
||||||
raise
|
raise
|
||||||
@ -647,36 +629,41 @@ class InstagramStoryIE(InstagramBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
username, story_id = self._match_valid_url(url).groups()
|
username, story_id = self._match_valid_url(url).groups()
|
||||||
story_info = self._download_webpage(url, story_id)
|
|
||||||
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
|
|
||||||
if not user_info:
|
|
||||||
self.raise_login_required('This content is unreachable')
|
|
||||||
user_id = user_info.get('id')
|
|
||||||
|
|
||||||
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
|
story_info_url = f'{username}/{story_id}/?__a=1' if username == 'highlights' else f'{username}/?__a=1'
|
||||||
videos = traverse_obj(self._download_json(
|
story_info = self._download_json(f'https://www.instagram.com/stories/{story_info_url}', story_id, headers={
|
||||||
f'https://i.instagram.com/api/v1/feed/reels_media/?reel_ids={story_info_url}',
|
|
||||||
story_id, errnote=False, fatal=False, headers={
|
|
||||||
'X-IG-App-ID': 936619743392459,
|
'X-IG-App-ID': 936619743392459,
|
||||||
'X-ASBD-ID': 198387,
|
'X-ASBD-ID': 198387,
|
||||||
'X-IG-WWW-Claim': 0,
|
'X-IG-WWW-Claim': 0,
|
||||||
}), 'reels')
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
if not videos:
|
'Referer': url,
|
||||||
self.raise_login_required('You need to log in to access this content')
|
})
|
||||||
|
user_id = story_info['user']['id']
|
||||||
|
highlight_title = traverse_obj(story_info, ('highlight', 'title'))
|
||||||
|
|
||||||
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (str(user_id), 'user', 'full_name'))
|
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
|
||||||
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
|
videos = self._download_json(f'https://i.instagram.com/api/v1/feed/reels_media/?reel_ids={story_info_url}', story_id, headers={
|
||||||
if not story_title:
|
'X-IG-App-ID': 936619743392459,
|
||||||
story_title = f'Story by {username}'
|
'X-ASBD-ID': 198387,
|
||||||
|
'X-IG-WWW-Claim': 0,
|
||||||
|
})['reels']
|
||||||
|
|
||||||
|
full_name = traverse_obj(videos, ('user', 'full_name'))
|
||||||
|
|
||||||
|
user_info = {}
|
||||||
|
if not (username and username != 'highlights' and full_name):
|
||||||
|
user_info = self._download_json(
|
||||||
|
f'https://i.instagram.com/api/v1/users/{user_id}/info/', story_id, headers={
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Linux; Android 11; SM-A505F Build/RP1A.200720.012; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/96.0.4664.45 Mobile Safari/537.36 Instagram 214.1.0.29.120 Android (30/11; 450dpi; 1080x2122; samsung; SM-A505F; a50; exynos9610; en_US; 333717274)',
|
||||||
|
}, note='Downloading user info')
|
||||||
|
|
||||||
|
username = traverse_obj(user_info, ('user', 'username')) or username
|
||||||
|
full_name = traverse_obj(user_info, ('user', 'full_name')) or full_name
|
||||||
|
|
||||||
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
|
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
|
||||||
info_data = []
|
return self.playlist_result([{
|
||||||
for highlight in highlights:
|
**self._extract_product(highlight),
|
||||||
highlight_data = self._extract_product(highlight)
|
'title': f'Story by {username}',
|
||||||
if highlight_data.get('formats'):
|
|
||||||
info_data.append({
|
|
||||||
**highlight_data,
|
|
||||||
'uploader': full_name,
|
'uploader': full_name,
|
||||||
'uploader_id': user_id,
|
'uploader_id': user_id,
|
||||||
})
|
} for highlight in highlights], playlist_id=story_id, playlist_title=highlight_title)
|
||||||
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)
|
|
||||||
|
@ -3485,7 +3485,6 @@ def age_restricted(content_limit, age_limit):
|
|||||||
return age_limit < content_limit
|
return age_limit < content_limit
|
||||||
|
|
||||||
|
|
||||||
# List of known byte-order-marks (BOM)
|
|
||||||
BOMS = [
|
BOMS = [
|
||||||
(b'\xef\xbb\xbf', 'utf-8'),
|
(b'\xef\xbb\xbf', 'utf-8'),
|
||||||
(b'\x00\x00\xfe\xff', 'utf-32-be'),
|
(b'\x00\x00\xfe\xff', 'utf-32-be'),
|
||||||
@ -3493,6 +3492,7 @@ BOMS = [
|
|||||||
(b'\xff\xfe', 'utf-16-le'),
|
(b'\xff\xfe', 'utf-16-le'),
|
||||||
(b'\xfe\xff', 'utf-16-be'),
|
(b'\xfe\xff', 'utf-16-be'),
|
||||||
]
|
]
|
||||||
|
""" List of known byte-order-marks (BOM) """
|
||||||
|
|
||||||
|
|
||||||
def is_html(first_bytes):
|
def is_html(first_bytes):
|
||||||
@ -5398,20 +5398,37 @@ def read_stdin(what):
|
|||||||
|
|
||||||
def determine_file_encoding(data):
|
def determine_file_encoding(data):
|
||||||
"""
|
"""
|
||||||
Detect the text encoding used
|
From the first 512 bytes of a given file,
|
||||||
|
it tries to detect the encoding to be used to read as text.
|
||||||
|
|
||||||
@returns (encoding, bytes to skip)
|
@returns (encoding, bytes to skip)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# BOM marks are given priority over declarations
|
|
||||||
for bom, enc in BOMS:
|
for bom, enc in BOMS:
|
||||||
|
# matching BOM beats any declaration
|
||||||
|
# BOMs are skipped to prevent any errors
|
||||||
if data.startswith(bom):
|
if data.startswith(bom):
|
||||||
return enc, len(bom)
|
return enc, len(bom)
|
||||||
|
|
||||||
# Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
|
# strip off all null bytes to match even when UTF-16 or UTF-32 is used
|
||||||
# We ignore the endianness to get a good enough match
|
# endians don't matter
|
||||||
data = data.replace(b'\0', b'')
|
data = data.replace(b'\0', b'')
|
||||||
mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
|
|
||||||
return mobj.group(1).decode() if mobj else None, 0
|
PREAMBLES = [
|
||||||
|
# "# -*- coding: utf-8 -*-"
|
||||||
|
# "# coding: utf-8"
|
||||||
|
rb'(?m)^#(?:\s+-\*-)?\s*coding\s*:\s*(?P<encoding>\S+)(?:\s+-\*-)?\s*$',
|
||||||
|
# "# vi: set fileencoding=utf-8"
|
||||||
|
rb'^#\s+vi\s*:\s+set\s+fileencoding=(?P<encoding>[^\s,]+)'
|
||||||
|
]
|
||||||
|
for pb in PREAMBLES:
|
||||||
|
mobj = re.match(pb, data)
|
||||||
|
if not mobj:
|
||||||
|
continue
|
||||||
|
# preambles aren't skipped since they're just ignored when reading as config
|
||||||
|
return mobj.group('encoding').decode(), 0
|
||||||
|
|
||||||
|
return None, 0
|
||||||
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
|
Loading…
Reference in New Issue
Block a user