release 2016.06.22

[svt] Various improvements
+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv * [svt:base] Detect geo restriction * [svt:base] Extract series related metadata
2026-01-25 00:00:04 -05:00 · 2016-06-22 23:43:24 +07:00 · 2016-06-22 23:36:07 +07:00 · 2016-06-22 12:52:15 +01:00 · 2016-06-21 22:31:41 +07:00 · 2016-06-21 13:37:57 +01:00
10 changed files with 174 additions and 129 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.20*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.20**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.22**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.06.20
+[debug] youtube-dl version 2016.06.22
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -248,7 +248,6 @@
 - **Gamersyde**
 - **GameSpot**
 - **GameStar**
- - **Gametrailers**
 - **Gazeta**
 - **GDCVault**
 - **generic**: Generic downloader that works on some sites
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals

-import re
-
 from .theplatform import ThePlatformFeedIE
 from ..utils import (
    int_or_none,
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -285,7 +285,6 @@ from .gameone import (
 from .gamersyde import GamersydeIE
 from .gamespot import GameSpotIE
 from .gamestar import GameStarIE
-from .gametrailers import GametrailersIE
 from .gazeta import GazetaIE
 from .gdcvault import GDCVaultIE
 from .generic import GenericIE
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -239,6 +239,8 @@ class FacebookIE(InfoExtractor):

        formats = []
        for format_id, f in video_data.items():
+            if f and isinstance(f, dict):
+                f = [f]
            if not f or not isinstance(f, list):
                continue
            for quality in ('sd', 'hd'):
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -1,19 +1,19 @@
 from __future__ import unicode_literals

 import re
-import json

-from .common import InfoExtractor
+from .once import OnceIE
 from ..compat import (
    compat_urllib_parse_unquote,
-    compat_urlparse,
 )
 from ..utils import (
    unescapeHTML,
+    url_basename,
+    dict_get,
 )


-class GameSpotIE(InfoExtractor):
+class GameSpotIE(OnceIE):
    _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
    _TESTS = [{
        'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
@@ -39,29 +39,73 @@ class GameSpotIE(InfoExtractor):
        webpage = self._download_webpage(url, page_id)
        data_video_json = self._search_regex(
            r'data-video=["\'](.*?)["\']', webpage, 'data video')
-        data_video = json.loads(unescapeHTML(data_video_json))
+        data_video = self._parse_json(unescapeHTML(data_video_json), page_id)
        streams = data_video['videoStreams']

+        manifest_url = None
        formats = []
        f4m_url = streams.get('f4m_stream')
-        if f4m_url is not None:
-            # Transform the manifest url to a link to the mp4 files
-            # they are used in mobile devices.
-            f4m_path = compat_urlparse.urlparse(f4m_url).path
-            QUALITIES_RE = r'((,\d+)+,?)'
-            qualities = self._search_regex(QUALITIES_RE, f4m_path, 'qualities').strip(',').split(',')
-            http_path = f4m_path[1:].split('/', 1)[1]
-            http_template = re.sub(QUALITIES_RE, r'%s', http_path)
-            http_template = http_template.replace('.csmil/manifest.f4m', '')
-            http_template = compat_urlparse.urljoin(
-                'http://video.gamespotcdn.com/', http_template)
-            for q in qualities:
-                formats.append({
-                    'url': http_template % q,
-                    'ext': 'mp4',
-                    'format_id': q,
-                })
-        else:
+        if f4m_url:
+            manifest_url = f4m_url
+            formats.extend(self._extract_f4m_formats(
+                f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
+        m3u8_url = streams.get('m3u8_stream')
+        if m3u8_url:
+            manifest_url = m3u8_url
+            m3u8_formats = self._extract_m3u8_formats(
+                m3u8_url, page_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False)
+            formats.extend(m3u8_formats)
+        progressive_url = dict_get(
+            streams, ('progressive_hd', 'progressive_high', 'progressive_low'))
+        if progressive_url and manifest_url:
+            qualities_basename = self._search_regex(
+                '/([^/]+)\.csmil/',
+                manifest_url, 'qualities basename', default=None)
+            if qualities_basename:
+                QUALITIES_RE = r'((,\d+)+,?)'
+                qualities = self._search_regex(
+                    QUALITIES_RE, qualities_basename,
+                    'qualities', default=None)
+                if qualities:
+                    qualities = list(map(lambda q: int(q), qualities.strip(',').split(',')))
+                    qualities.sort()
+                    http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename)
+                    http_url_basename = url_basename(progressive_url)
+                    if m3u8_formats:
+                        self._sort_formats(m3u8_formats)
+                        m3u8_formats = list(filter(
+                            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+                            m3u8_formats))
+                    if len(qualities) == len(m3u8_formats):
+                        for q, m3u8_format in zip(qualities, m3u8_formats):
+                            f = m3u8_format.copy()
+                            f.update({
+                                'url': progressive_url.replace(
+                                    http_url_basename, http_template % q),
+                                'format_id': f['format_id'].replace('hls', 'http'),
+                                'protocol': 'http',
+                            })
+                            formats.append(f)
+                    else:
+                        for q in qualities:
+                            formats.append({
+                                'url': progressive_url.replace(
+                                    http_url_basename, http_template % q),
+                                'ext': 'mp4',
+                                'format_id': 'http-%d' % q,
+                                'tbr': q,
+                            })
+
+        onceux_json = self._search_regex(
+            r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
+        if onceux_json:
+            onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
+            if onceux_url:
+                formats.extend(self._extract_once_formats(re.sub(
+                    r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', '')))
+
+        if not formats:
            for quality in ['sd', 'hd']:
                # It's actually a link to a flv file
                flv_url = streams.get('f4m_{0}'.format(quality))
@@ -71,6 +115,7 @@ class GameSpotIE(InfoExtractor):
                        'ext': 'flv',
                        'format_id': quality,
                    })
+        self._sort_formats(formats)

        return {
            'id': data_video['guid'],
--- a/youtube_dl/extractor/gametrailers.py
+++ b/youtube_dl/extractor/gametrailers.py
@@ -1,62 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    parse_age_limit,
-    url_basename,
-)
-
-
-class GametrailersIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
-
-    _TEST = {
-        'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review',
-        'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a',
-        'info_dict': {
-            'id': '2983958',
-            'ext': 'mp4',
-            'display_id': '116437-Just-Cause-3-Review',
-            'title': 'Just Cause 3 - Review',
-            'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?',
-        },
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        title = self._html_search_regex(
-            r'<title>(.+?)\|', webpage, 'title').strip()
-        embed_url = self._proto_relative_url(
-            self._search_regex(
-                r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage,
-                'embed url'),
-            scheme='http:')
-        video_id = url_basename(embed_url)
-        embed_page = self._download_webpage(embed_url, video_id)
-        embed_vars_json = self._search_regex(
-            r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page,
-            'embed vars')
-        info = self._parse_json(embed_vars_json, video_id)
-
-        formats = []
-        for media in info['media']:
-            if media['mediaPurpose'] == 'play':
-                formats.append({
-                    'url': media['uri'],
-                    'height': media['height'],
-                    'width:': media['width'],
-                })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'title': title,
-            'formats': formats,
-            'thumbnail': info.get('thumbUri'),
-            'description': self._og_search_description(webpage),
-            'duration': int_or_none(info.get('videoLengthInSeconds')),
-            'age_limit': parse_age_limit(info.get('audienceRating')),
-        }
--- a/youtube_dl/extractor/streamcloud.py
+++ b/youtube_dl/extractor/streamcloud.py
@@ -6,7 +6,6 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
-    sanitized_Request,
    urlencode_postdata,
 )

@@ -45,20 +44,26 @@ class StreamcloudIE(InfoExtractor):
            (?:id="[^"]+"\s+)?
            value="([^"]*)"
            ''', orig_webpage)
-        post = urlencode_postdata(fields)

        self._sleep(12, video_id)
-        headers = {
-            b'Content-Type': b'application/x-www-form-urlencoded',
-        }
-        req = sanitized_Request(url, post, headers)

        webpage = self._download_webpage(
-            req, video_id, note='Downloading video page ...')
-        title = self._html_search_regex(
-            r'<h1[^>]*>([^<]+)<', webpage, 'title')
-        video_url = self._search_regex(
-            r'file:\s*"([^"]+)"', webpage, 'video URL')
+            url, video_id, data=urlencode_postdata(fields), headers={
+                b'Content-Type': b'application/x-www-form-urlencoded',
+            })
+
+        try:
+            title = self._html_search_regex(
+                r'<h1[^>]*>([^<]+)<', webpage, 'title')
+            video_url = self._search_regex(
+                r'file:\s*"([^"]+)"', webpage, 'video URL')
+        except ExtractorError:
+            message = self._html_search_regex(
+                r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>',
+                webpage, 'message', default=None, group='message')
+            if message:
+                raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+            raise
        thumbnail = self._search_regex(
            r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)

--- a/youtube_dl/extractor/svt.py
+++ b/youtube_dl/extractor/svt.py
@@ -6,17 +6,14 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
+    dict_get,
+    int_or_none,
+    try_get,
 )


 class SVTBaseIE(InfoExtractor):
-    def _extract_video(self, url, video_id):
-        info = self._download_json(url, video_id)
-
-        title = info['context']['title']
-        thumbnail = info['context'].get('thumbnailImage')
-
-        video_info = info['video']
+    def _extract_video(self, video_info, video_id):
        formats = []
        for vr in video_info['videoReferences']:
            player_type = vr.get('playerType')
@@ -40,27 +37,49 @@ class SVTBaseIE(InfoExtractor):
                    'format_id': player_type,
                    'url': vurl,
                })
+        if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
+            self.raise_geo_restricted('This video is only available in Sweden')
        self._sort_formats(formats)

        subtitles = {}
-        subtitle_references = video_info.get('subtitleReferences')
+        subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
        if isinstance(subtitle_references, list):
            for sr in subtitle_references:
                subtitle_url = sr.get('url')
+                subtitle_lang = sr.get('language', 'sv')
                if subtitle_url:
-                    subtitles.setdefault('sv', []).append({'url': subtitle_url})
+                    if determine_ext(subtitle_url) == 'm3u8':
+                        # TODO(yan12125): handle WebVTT in m3u8 manifests
+                        continue

-        duration = video_info.get('materialLength')
-        age_limit = 18 if video_info.get('inappropriateForChildren') else 0
+                    subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url})
+
+        title = video_info.get('title')
+
+        series = video_info.get('programTitle')
+        season_number = int_or_none(video_info.get('season'))
+        episode = video_info.get('episodeTitle')
+        episode_number = int_or_none(video_info.get('episodeNumber'))
+
+        duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
+        age_limit = None
+        adult = dict_get(
+            video_info, ('inappropriateForChildren', 'blockedForChildren'),
+            skip_false_values=False)
+        if adult is not None:
+            age_limit = 18 if adult else 0

        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'subtitles': subtitles,
-            'thumbnail': thumbnail,
            'duration': duration,
            'age_limit': age_limit,
+            'series': series,
+            'season_number': season_number,
+            'episode': episode,
+            'episode_number': episode_number,
        }


@@ -68,11 +87,11 @@ class SVTIE(SVTBaseIE):
    _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
-        'md5': '9648197555fc1b49e3dc22db4af51d46',
+        'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
        'info_dict': {
            'id': '2900353',
-            'ext': 'flv',
-            'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
+            'ext': 'mp4',
+            'title': 'Stjärnorna skojar till det - under SVT-intervjun',
            'duration': 27,
            'age_limit': 0,
        },
@@ -89,15 +108,20 @@ class SVTIE(SVTBaseIE):
        mobj = re.match(self._VALID_URL, url)
        widget_id = mobj.group('widget_id')
        article_id = mobj.group('id')
-        return self._extract_video(
+
+        info = self._download_json(
            'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
            article_id)

+        info_dict = self._extract_video(info['video'], article_id)
+        info_dict['title'] = info['context']['title']
+        return info_dict
+

 class SVTPlayIE(SVTBaseIE):
    IE_DESC = 'SVT Play and Öppet arkiv'
-    _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
+    _TESTS = [{
        'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
        'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
        'info_dict': {
@@ -113,12 +137,47 @@ class SVTPlayIE(SVTBaseIE):
                }]
            },
        },
-    }
+    }, {
+        # geo restricted to Sweden
+        'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
+        'only_matching': True,
+    }]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        host = mobj.group('host')
-        return self._extract_video(
-            'http://www.%s.se/video/%s?output=json' % (host, video_id),
-            video_id)
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        data = self._parse_json(
+            self._search_regex(
+                r'root\["__svtplay"\]\s*=\s*([^;]+);',
+                webpage, 'embedded data', default='{}'),
+            video_id, fatal=False)
+
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        if data:
+            video_info = try_get(
+                data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
+                dict)
+            if video_info:
+                info_dict = self._extract_video(video_info, video_id)
+                info_dict.update({
+                    'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
+                    'thumbnail': thumbnail,
+                })
+                return info_dict
+
+        video_id = self._search_regex(
+            r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
+            webpage, 'video id', default=None)
+
+        if video_id:
+            data = self._download_json(
+                'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id)
+            info_dict = self._extract_video(data, video_id)
+            if not info_dict.get('title'):
+                info_dict['title'] = re.sub(
+                    r'\s*\|\s*.+?$', '',
+                    info_dict.get('episode') or self._og_search_title(webpage))
+            return info_dict
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2016.06.20'
+__version__ = '2016.06.22'
Author	SHA1	Message	Date
Sergey M․	cf40fdf5c1	release 2016.06.22	2016-06-22 23:43:24 +07:00
Sergey M․	23bdae0955	[svt] Various improvements + [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv * [svt:base] Detect geo restriction * [svt:base] Extract series related metadata	2016-06-22 23:36:07 +07:00
Shai Coleman	ca74c90bf5	Fix issue downloading facebook videos youtube-dl expects the format items to be returned as a list, but when there's only one item Facebook returns a dict instead, this wraps the dict in a list if necessary	2016-06-22 12:52:15 +01:00
Sergey M․	7cfc1e2a10	[gametrailers] Remove extractor gametrailers closed (see http://www.polygon.com/2016/2/8/10944452/gametrailers-shuts-down-after-13-year-run)	2016-06-21 22:31:41 +07:00
Remita Amine	1ac5705f62	[gamespot] extract all formats	2016-06-21 13:37:57 +01:00
Yen Chi Hsuan	e4f90ea0a7	[svt] Fix extraction for SVTPlay (closes #9809 )	2016-06-21 17:55:53 +08:00
Sergey M․	cdfc187cd5	[cbs] Remove unused import	2016-06-20 22:40:33 +07:00
Sergey M․	feef925f49	[streamcloud] Capture error message (#9840 )	2016-06-20 22:40:22 +07:00