From b2758123c5e759fdb0c7d23d380e4dd9e245cd4a Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Wed, 19 Oct 2016 16:22:40 +0100
Subject: [PATCH] add Basic support for Smooth Streaming protocol(#8118)

---
 youtube_dl/YoutubeDL.py           |   2 +-
 youtube_dl/downloader/__init__.py |   2 +
 youtube_dl/downloader/ism.py      | 273 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/common.py    | 101 +++++++++++
 4 files changed, 377 insertions(+), 1 deletion(-)
 create mode 100644 youtube_dl/downloader/ism.py

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 99825e343e..53f20ac2cb 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1658,7 +1658,7 @@ class YoutubeDL(object):
                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
                         if video_ext and audio_ext:
                             COMPATIBLE_EXTS = (
-                                ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
+                                ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
                                 ('webm')
                             )
                             for exts in COMPATIBLE_EXTS:
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
index 817591d97e..16952e359b 100644
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -7,6 +7,7 @@ from .http import HttpFD
 from .rtmp import RtmpFD
 from .dash import DashSegmentsFD
 from .rtsp import RtspFD
+from .ism import IsmFD
 from .external import (
     get_external_downloader,
     FFmpegFD,
@@ -24,6 +25,7 @@ PROTOCOL_MAP = {
     'rtsp': RtspFD,
     'f4m': F4mFD,
     'http_dash_segments': DashSegmentsFD,
+    'ism': IsmFD,
 }
 
 
diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py
new file mode 100644
index 0000000000..9f8f14b669
--- /dev/null
+++ b/youtube_dl/downloader/ism.py
@@ -0,0 +1,273 @@
+from __future__ import unicode_literals
+
+import os
+import time
+import struct
+import binascii
+import io
+
+from .fragment import FragmentFD
+from ..compat import compat_urllib_error
+from ..utils import (
+    sanitize_open,
+    encodeFilename,
+)
+
+
+u8 = struct.Struct(b'>B')
+u88 = struct.Struct(b'>Bx')
+u16 = struct.Struct(b'>H')
+u1616 = struct.Struct(b'>Hxx')
+u32 = struct.Struct(b'>I')
+u64 = struct.Struct(b'>Q')
+
+s88 = struct.Struct(b'>bx')
+s16 = struct.Struct(b'>h')
+s1616 = struct.Struct(b'>hxx')
+s32 = struct.Struct(b'>i')
+
+unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
+
+TRACK_ENABLED = 0x1
+TRACK_IN_MOVIE = 0x2
+TRACK_IN_PREVIEW = 0x4
+
+SELF_CONTAINED = 0x1
+
+
+def box(box_type, payload):
+    return u32.pack(8 + len(payload)) + box_type + payload
+
+
+def full_box(box_type, version, flags, payload):
+    return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
+
+
+def write_piff_header(stream, params):
+    track_id = params['track_id']
+    fourcc = params['fourcc']
+    duration = params['duration']
+    timescale = params.get('timescale', 10000000)
+    language = params.get('language', 'und')
+    height = params.get('height', 0)
+    width = params.get('width', 0)
+    is_audio = width == 0 and height == 0
+    creation_time = modification_time = int(time.time())
+
+    ftyp_payload = b'isml'  # major brand
+    ftyp_payload += u32.pack(1)  # minor version
+    ftyp_payload += b'piff' + b'iso2'  # compatible brands
+    stream.write(box(b'ftyp', ftyp_payload))  # File Type Box
+
+    mvhd_payload = u64.pack(creation_time)
+    mvhd_payload += u64.pack(modification_time)
+    mvhd_payload += u32.pack(timescale)
+    mvhd_payload += u64.pack(duration)
+    mvhd_payload += s1616.pack(1)  # rate
+    mvhd_payload += s88.pack(1)  # volume
+    mvhd_payload += u16.pack(0)  # reserved
+    mvhd_payload += u32.pack(0) * 2  # reserved
+    mvhd_payload += unity_matrix
+    mvhd_payload += u32.pack(0) * 6  # pre defined
+    mvhd_payload += u32.pack(0xffffffff)  # next track id
+    moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload)  # Movie Header Box
+
+    tkhd_payload = u64.pack(creation_time)
+    tkhd_payload += u64.pack(modification_time)
+    tkhd_payload += u32.pack(track_id)  # track id
+    tkhd_payload += u32.pack(0)  # reserved
+    tkhd_payload += u64.pack(duration)
+    tkhd_payload += u32.pack(0) * 2  # reserved
+    tkhd_payload += s16.pack(0)  # layer
+    tkhd_payload += s16.pack(0)  # alternate group
+    tkhd_payload += s88.pack(1 if is_audio else 0)  # volume
+    tkhd_payload += u16.pack(0)  # reserved
+    tkhd_payload += unity_matrix
+    tkhd_payload += u1616.pack(width)
+    tkhd_payload += u1616.pack(height)
+    trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload)  # Track Header Box
+
+    mdhd_payload = u64.pack(creation_time)
+    mdhd_payload += u64.pack(modification_time)
+    mdhd_payload += u32.pack(timescale)
+    mdhd_payload += u64.pack(duration)
+    mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
+    mdhd_payload += u16.pack(0)  # pre defined
+    mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload)  # Media Header Box
+
+    hdlr_payload = u32.pack(0)  # pre defined
+    hdlr_payload += b'soun' if is_audio else b'vide'  # handler type
+    hdlr_payload += u32.pack(0) * 3  # reserved
+    hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0'  # name
+    mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload)  # Handler Reference Box
+
+    if is_audio:
+        smhd_payload = s88.pack(0)  # balance
+        smhd_payload = u16.pack(0)  # reserved
+        media_header_box = full_box(b'smhd', 0, 0, smhd_payload)  # Sound Media Header
+    else:
+        vmhd_payload = u16.pack(0)  # graphics mode
+        vmhd_payload += u16.pack(0) * 3  # opcolor
+        media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload)  # Video Media Header
+    minf_payload = media_header_box
+
+    dref_payload = u32.pack(1)  # entry count
+    dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'')  # Data Entry URL Box
+    dinf_payload = full_box(b'dref', 0, 0, dref_payload)  # Data Reference Box
+    minf_payload += box(b'dinf', dinf_payload)  # Data Information Box
+
+    stsd_payload = u32.pack(1)  # entry count
+
+    sample_entry_payload = u8.pack(0) * 6  # reserved
+    sample_entry_payload += u16.pack(1)  # data reference index
+    if is_audio:
+        sample_entry_payload += u32.pack(0) * 2  # reserved
+        sample_entry_payload += u16.pack(params.get('channels', 2))
+        sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
+        sample_entry_payload += u16.pack(0)  # pre defined
+        sample_entry_payload += u16.pack(0)  # reserved
+        sample_entry_payload += u1616.pack(params['sampling_rate'])
+
+        if fourcc == 'AACL':
+            smaple_entry_box = box(b'mp4a', sample_entry_payload)
+    else:
+        sample_entry_payload = sample_entry_payload
+        sample_entry_payload += u16.pack(0)  # pre defined
+        sample_entry_payload += u16.pack(0)  # reserved
+        sample_entry_payload += u32.pack(0) * 3  # pre defined
+        sample_entry_payload += u16.pack(width)
+        sample_entry_payload += u16.pack(height)
+        sample_entry_payload += u1616.pack(0x48)  # horiz resolution 72 dpi
+        sample_entry_payload += u1616.pack(0x48)  # vert resolution 72 dpi
+        sample_entry_payload += u32.pack(0)  # reserved
+        sample_entry_payload += u16.pack(1)  # frame count
+        sample_entry_payload += u8.pack(0) * 32  # compressor name
+        sample_entry_payload += u16.pack(0x18)  # depth
+        sample_entry_payload += s16.pack(-1)  # pre defined
+
+        codec_private_data = binascii.unhexlify(params['codec_private_data'])
+        if fourcc in ('H264', 'AVC1'):
+            sps, pps = codec_private_data.split(u32.pack(1))[1:]
+            avcc_payload = u8.pack(1)  # configuration version
+            avcc_payload += sps[1]  # avc profile indication
+            avcc_payload += sps[2]  # profile compatibility
+            avcc_payload += sps[3]  # avc level indication
+            avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1))  # complete represenation (1) + reserved (11111) + length size minus one
+            avcc_payload += u8.pack(1)  # reserved (0) + number of sps (0000001)
+            avcc_payload += u16.pack(len(sps))
+            avcc_payload += sps
+            avcc_payload += u8.pack(1)  # number of pps
+            avcc_payload += u16.pack(len(pps))
+            avcc_payload += pps
+            sample_entry_payload += box(b'avcC', avcc_payload)  # AVC Decoder Configuration Record
+            smaple_entry_box = box(b'avc1', sample_entry_payload)  # AVC Simple Entry
+    stsd_payload += smaple_entry_box
+
+    stbl_payload = full_box(b'stsd', 0, 0, stsd_payload)  # Sample Description Box
+
+    stts_payload = u32.pack(0)  # entry count
+    stbl_payload += full_box(b'stts', 0, 0, stts_payload)  # Decoding Time to Sample Box
+
+    stsc_payload = u32.pack(0)  # entry count
+    stbl_payload += full_box(b'stsc', 0, 0, stsc_payload)  # Sample To Chunk Box
+
+    stco_payload = u32.pack(0)  # entry count
+    stbl_payload += full_box(b'stco', 0, 0, stco_payload)  # Chunk Offset Box
+
+    minf_payload += box(b'stbl', stbl_payload)  # Sample Table Box
+
+    mdia_payload += box(b'minf', minf_payload)  # Media Information Box
+
+    trak_payload += box(b'mdia', mdia_payload)  # Media Box
+
+    moov_payload += box(b'trak', trak_payload)  # Track Box
+
+    mehd_payload = u64.pack(duration)
+    mvex_payload = full_box(b'mehd', 1, 0, mehd_payload)  # Movie Extends Header Box
+
+    trex_payload = u32.pack(track_id)  # track id
+    trex_payload += u32.pack(1)  # default sample description index
+    trex_payload += u32.pack(0)  # default sample duration
+    trex_payload += u32.pack(0)  # default sample size
+    trex_payload += u32.pack(0)  # default sample flags
+    mvex_payload += full_box(b'trex', 0, 0, trex_payload)  # Track Extends Box
+
+    moov_payload += box(b'mvex', mvex_payload)  # Movie Extends Box
+    stream.write(box(b'moov', moov_payload))  # Movie Box
+
+
+def extract_box_data(data, box_sequence):
+    data_reader = io.BytesIO(data)
+    while True:
+        box_size = u32.unpack(data_reader.read(4))[0]
+        box_type = data_reader.read(4)
+        if box_type == box_sequence[0]:
+            box_data = data_reader.read(box_size - 8)
+            if len(box_sequence) == 1:
+                return box_data
+            return extract_box_data(box_data, box_sequence[1:])
+        data_reader.seek(box_size - 8, 1)
+
+
+class IsmFD(FragmentFD):
+    """
+    Download segments in a ISM manifest
+    """
+
+    FD_NAME = 'ism'
+
+    def real_download(self, filename, info_dict):
+        segments = info_dict['fragments'][:1] if self.params.get(
+            'test', False) else info_dict['fragments']
+
+        ctx = {
+            'filename': filename,
+            'total_frags': len(segments),
+        }
+
+        self._prepare_and_start_frag_download(ctx)
+
+        segments_filenames = []
+
+        fragment_retries = self.params.get('fragment_retries', 0)
+        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+
+        track_written = False
+        for i, segment in enumerate(segments):
+            segment_url = segment['url']
+            segment_name = 'Frag%d' % i
+            target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name)
+            count = 0
+            while count <= fragment_retries:
+                try:
+                    success = ctx['dl'].download(target_filename, {'url': segment_url})
+                    if not success:
+                        return False
+                    down, target_sanitized = sanitize_open(target_filename, 'rb')
+                    down_data = down.read()
+                    if not track_written:
+                        tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd'])
+                        info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
+                        write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
+                        track_written = True
+                    ctx['dest_stream'].write(down_data)
+                    down.close()
+                    segments_filenames.append(target_sanitized)
+                    break
+                except compat_urllib_error.HTTPError as err:
+                    count += 1
+                    if count <= fragment_retries:
+                        self.report_retry_fragment(err, segment_name, count, fragment_retries)
+            if count > fragment_retries:
+                if skip_unavailable_fragments:
+                    self.report_skip_fragment(segment_name)
+                    continue
+                self.report_error('giving up after %s fragment retries' % fragment_retries)
+                return False
+
+        self._finish_frag_download(ctx)
+
+        for segment_file in segments_filenames:
+            os.remove(encodeFilename(segment_file))
+
+        return True
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 68b325fca3..2e9f05ae3b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1780,6 +1780,107 @@ class InfoExtractor(object):
                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
         return formats
 
+    def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
+        res = self._download_webpage_handle(
+            ism_url, video_id,
+            note=note or 'Downloading ISM manifest',
+            errnote=errnote or 'Failed to download ISM manifest',
+            fatal=fatal)
+        if res is False:
+            return []
+        ism, urlh = res
+
+        return self._parse_ism_formats(
+            compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
+
+    def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+        if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
+            return []
+
+        ism_base_url = re.match(r'https?://.+/', ism_url).group()
+
+        duration = int(ism_doc.attrib['Duration'])
+        timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
+
+        formats = []
+        for stream in ism_doc.findall('StreamIndex'):
+            stream_type = stream.get('Type')
+            if stream_type not in ('video', 'audio'):
+                continue
+            url_pattern = stream.attrib['Url']
+            stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
+            stream_name = stream.get('Name')
+            for track in stream.findall('QualityLevel'):
+                fourcc = track.get('FourCC')
+                # TODO: add support for WVC1 and WMAP
+                if fourcc not in ('H264', 'AVC1', 'AACL'):
+                    self.report_warning('%s is not a supported codec' % fourcc)
+                    continue
+                tbr = int(track.attrib['Bitrate']) // 1000
+                width = int_or_none(track.get('MaxWidth'))
+                height = int_or_none(track.get('MaxHeight'))
+                sampling_rate = int_or_none(track.get('SamplingRate'))
+
+                track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
+                track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
+
+                fragments = []
+                fragment_ctx = {
+                    'time': 0,
+                }
+                stream_fragments = stream.findall('c')
+                for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
+                    fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
+                    fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
+                    fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
+                    if not fragment_ctx['duration']:
+                        try:
+                            next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
+                        except IndexError:
+                            next_fragment_time = duration
+                        fragment_ctx['duration'] = (next_fragment_time - frgament_time) / fragment_repeat
+                    for _ in range(fragment_repeat):
+                        fragments.append({
+                            'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern),
+                            'duration': fragment_ctx['duration'] / stream_timescale,
+                        })
+                        fragment_ctx['time'] += fragment_ctx['duration']
+
+                format_id = []
+                if ism_id:
+                    format_id.append(ism_id)
+                if stream_name:
+                    format_id.append(stream_name)
+                format_id.append(compat_str(tbr))
+
+                formats.append({
+                    'format_id': '-'.join(format_id),
+                    'url': ism_url,
+                    'manifest_url': ism_url,
+                    'ext': 'ismv' if stream_type == 'video' else 'isma',
+                    'width': width,
+                    'height': height,
+                    'tbr': tbr,
+                    'asr': sampling_rate,
+                    'vcodec': 'none' if stream_type == 'audio' else fourcc,
+                    'acodec': 'none' if stream_type == 'video' else fourcc,
+                    'protocol': 'ism',
+                    'fragments': fragments,
+                    '_download_params': {
+                        'duration': duration,
+                        'timescale': stream_timescale,
+                        'width': width or 0,
+                        'height': height or 0,
+                        'fourcc': fourcc,
+                        'codec_private_data': track.get('CodecPrivateData'),
+                        'sampling_rate': sampling_rate,
+                        'channels': int_or_none(track.get('Channels', 2)),
+                        'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+                        'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+                    },
+                })
+        return formats
+
     def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
         def absolute_url(video_url):
             return compat_urlparse.urljoin(base_url, video_url)