release 2016.07.28

[twitch:clips] Fix extraction (Closes #9767 )
[extractor/generic] Add test for #10179
2026-01-25 00:00:04 -05:00 · 2016-07-28 02:42:57 +07:00 · 2016-07-28 22:30:09 +07:00 · 2016-07-28 22:20:08 +07:00 · 2016-07-28 22:16:05 +07:00 · 2016-07-28 22:15:15 +07:00
10 changed files with 78 additions and 42 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.24**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.28**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.07.24
+[debug] youtube-dl version 2016.07.28
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/devscripts/show-downloads-statistics.py
+++ b/devscripts/show-downloads-statistics.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals

+import itertools
 import json
 import os
 import re
@@ -21,21 +22,26 @@ def format_size(bytes):

 total_bytes = 0

-releases = json.loads(compat_urllib_request.urlopen(
-    'https://api.github.com/repos/rg3/youtube-dl/releases').read().decode('utf-8'))
+for page in itertools.count(1):
+    releases = json.loads(compat_urllib_request.urlopen(
+        'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page
+    ).read().decode('utf-8'))

-for release in releases:
-    compat_print(release['name'])
-    for asset in release['assets']:
-        asset_name = asset['name']
-        total_bytes += asset['download_count'] * asset['size']
-        if all(not re.match(p, asset_name) for p in (
-                r'^youtube-dl$',
-                r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
-                r'^youtube-dl\.exe$')):
-            continue
-        compat_print(
-            ' %s size: %s downloads: %d'
-            % (asset_name, format_size(asset['size']), asset['download_count']))
+    if not releases:
+        break
+
+    for release in releases:
+        compat_print(release['name'])
+        for asset in release['assets']:
+            asset_name = asset['name']
+            total_bytes += asset['download_count'] * asset['size']
+            if all(not re.match(p, asset_name) for p in (
+                    r'^youtube-dl$',
+                    r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
+                    r'^youtube-dl\.exe$')):
+                continue
+            compat_print(
+                ' %s size: %s downloads: %d'
+                % (asset_name, format_size(asset['size']), asset['download_count']))

 compat_print('total downloads traffic: %s' % format_size(total_bytes))
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -142,7 +142,6 @@
 - **CollegeRama**
 - **ComCarCoff**
 - **ComedyCentral**
- - **ComedyCentralShows**: The Daily Show / The Colbert Report
 - **ComedyCentralTV**
 - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
 - **Coub**
@@ -401,7 +400,6 @@
 - **MSN**
 - **MTV**
 - **mtv.de**
- - **mtviggy.com**
 - **mtvservices:embedded**
 - **MuenchenTV**: münchen.tv
 - **MusicPlayOn**
@@ -441,7 +439,6 @@
 - **Newstube**
 - **NextMedia**: 蘋果日報
 - **NextMediaActionNews**: 蘋果日報 - 動新聞
- - **nextmovie.com**
 - **nfb**: National Film Board of Canada
 - **nfl.com**
 - **nhl.com**
@@ -699,6 +696,7 @@
 - **TNAFlix**
 - **TNAFlixNetworkEmbed**
 - **toggle**
+ - **Tosh**: Tosh.0
 - **tou.tv**
 - **Toypics**: Toypics user profile
 - **ToypicsUser**: Toypics user profile
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -73,6 +73,7 @@ class ARDMediathekIE(InfoExtractor):
            'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
            'duration': 3287,
        },
+        'skip': 'Video is no longer available',
    }]

    def _extract_media_info(self, media_info_url, webpage, video_id):
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1786,7 +1786,7 @@ class InfoExtractor(object):

        any_restricted = False
        for tc in self.get_testcases(include_onlymatching=False):
-            if 'playlist' in tc:
+            if tc.get('playlist', []):
                tc = tc['playlist'][0]
            is_restricted = age_restricted(
                tc.get('info_dict', {}).get('age_limit'), age_limit)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -71,6 +71,7 @@ from .vessel import VesselIE
 from .kaltura import KalturaIE
 from .eagleplatform import EaglePlatformIE
 from .facebook import FacebookIE
+from .soundcloud import SoundcloudIE


 class GenericIE(InfoExtractor):
@@ -784,6 +785,15 @@ class GenericIE(InfoExtractor):
                'upload_date': '20141029',
            }
        },
+        # Soundcloud multiple embeds
+        {
+            'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
+            'info_dict': {
+                'id': '52809',
+                'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance  | TAB + AUDIO',
+            },
+            'playlist_mincount': 7,
+        },
        # Livestream embed
        {
            'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
@@ -1999,12 +2009,9 @@ class GenericIE(InfoExtractor):
            return self.url_result(myvi_url)

        # Look for embedded soundcloud player
-        mobj = re.search(
-            r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
-            webpage)
-        if mobj is not None:
-            url = unescapeHTML(mobj.group('url'))
-            return self.url_result(url)
+        soundcloud_urls = SoundcloudIE._extract_urls(webpage)
+        if soundcloud_urls:
+            return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())

        # Look for embedded mtvservices player
        mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@@ -6,7 +6,6 @@ from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
-    sanitized_Request,
    urlencode_postdata,
 )

@@ -37,28 +36,33 @@ class SharedIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+
+        webpage, urlh = self._download_webpage_handle(url, video_id)

        if '>File does not exist<' in webpage:
            raise ExtractorError(
                'Video %s does not exist' % video_id, expected=True)

        download_form = self._hidden_inputs(webpage)
-        request = sanitized_Request(
-            url, urlencode_postdata(download_form))
-        request.add_header('Content-Type', 'application/x-www-form-urlencoded')

        video_page = self._download_webpage(
-            request, video_id, 'Downloading video page')
+            urlh.geturl(), video_id, 'Downloading video page',
+            data=urlencode_postdata(download_form),
+            headers={
+                'Content-Type': 'application/x-www-form-urlencoded',
+                'Referer': urlh.geturl(),
+            })

        video_url = self._html_search_regex(
-            r'data-url="([^"]+)"', video_page, 'video URL')
+            r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+            video_page, 'video URL', group='url')
        title = base64.b64decode(self._html_search_meta(
            'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
        filesize = int_or_none(self._html_search_meta(
            'full:size', webpage, 'file size', fatal=False))
        thumbnail = self._html_search_regex(
-            r'data-poster="([^"]+)"', video_page, 'thumbnail', default=None)
+            r'data-poster=(["\'])(?P<url>(?:(?!\1).)+)\1',
+            video_page, 'thumbnail', default=None, group='url')

        return {
            'id': video_id,
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -119,6 +119,12 @@ class SoundcloudIE(InfoExtractor):
    _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
    _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'

+    @staticmethod
+    def _extract_urls(webpage):
+        return [m.group('url') for m in re.finditer(
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
+            webpage)]
+
    def report_resolve(self, video_id):
        """Report information extraction."""
        self.to_screen('%s: Resolving id' % video_id)
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -461,7 +461,7 @@ class TwitchClipsIE(InfoExtractor):
    IE_NAME = 'twitch:clips'
    _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'

-    _TEST = {
+    _TESTS = [{
        'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound',
        'md5': '761769e1eafce0ffebfb4089cb3847cd',
        'info_dict': {
@@ -473,7 +473,11 @@ class TwitchClipsIE(InfoExtractor):
            'uploader': 'stereotype_',
            'uploader_id': 'stereotype_',
        },
-    }
+    }, {
+        # multiple formats
+        'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
+        'only_matching': True,
+    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
@@ -485,15 +489,25 @@ class TwitchClipsIE(InfoExtractor):
                r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
            video_id, transform_source=js_to_json)

-        video_url = clip['clip_video_url']
-        title = clip['channel_title']
+        title = clip.get('channel_title') or self._og_search_title(webpage)
+
+        formats = [{
+            'url': option['source'],
+            'format_id': option.get('quality'),
+            'height': int_or_none(option.get('quality')),
+        } for option in clip.get('quality_options', []) if option.get('source')]
+
+        if not formats:
+            formats = [{
+                'url': clip['clip_video_url'],
+            }]

        return {
            'id': video_id,
-            'url': video_url,
            'title': title,
            'thumbnail': self._og_search_thumbnail(webpage),
            'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'),
            'uploader': clip.get('curator_login'),
            'uploader_id': clip.get('curator_display_name'),
+            'formats': formats,
        }
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2016.07.24'
+__version__ = '2016.07.28'
Author	SHA1	Message	Date
Sergey M․	d9d56deadf	release 2016.07.28	2016-07-28 02:42:57 +07:00
Sergey M․	74ba450a81	[twitch:clips] Fix extraction (Closes #9767 )	2016-07-28 22:30:09 +07:00
Sergey M․	db19df6ca0	[extractor/generic] Add test for #10179	2016-07-28 22:20:08 +07:00
Sergey M․	fbdf8d15d1	[soundcloud] Add _extract_urls (#10179 )	2016-07-28 22:16:05 +07:00
Sergey M․	94aae01548	[extractor/generic] Extract all soundcloud embeds (Closes #10179 )	2016-07-28 22:15:15 +07:00
Sergey M․	39eef54cf0	[ard:mediathek] Skip unavailable test	2016-07-28 21:38:23 +07:00
Sergey M․	05c8268c81	[shared] Modernize and make more robust	2016-07-27 23:39:02 +07:00
Sergey M․	289a16b4f3	[shared] Respect redirect URL (Closes #10170 )	2016-07-27 23:28:01 +07:00
Sergey M․	7935926baa	[devscripts/show-downloads-statistics] Add support for paging	2016-07-27 00:14:40 +07:00
Sergey M․	dcbb07c35a	release 2016.07.26.2	2016-07-26 23:56:53 +07:00
Sergey M․	40090e8d51	[extractor/common] Improve is_suitable In order to fix breakage introduced by `a3aa814b77`	2016-07-26 23:54:06 +07:00