1
0
mirror of https://gitlab.com/ytdl-org/youtube-dl.git synced 2026-01-25 00:00:04 -05:00

Compare commits

...

15 Commits

Author SHA1 Message Date
Sergey M․
b4241e308e release 2016.06.25 2016-06-25 03:03:20 +07:00
Sergey M․
3d4b08dfc7 [setup.py] Add file version information and quotes consistency (Closes #9878) 2016-06-25 02:50:12 +07:00
Sergey M․
be49068d65 [youtube] Fix and skip some tests 2016-06-24 22:47:19 +07:00
Sergey M․
525cedb971 [youtube] Relax URL expansion in description 2016-06-24 22:37:13 +07:00
Sergey M․
de3c7fe0d4 [youtube] Fix 141 format tests 2016-06-24 22:27:55 +07:00
Yen Chi Hsuan
896cc72750 [mixcloud] View count and like count may be absent
Closes #9874
2016-06-24 17:26:12 +08:00
Yen Chi Hsuan
c1ff6e1ad0 [vimeo:review] Fix extraction for password-protected videos
Closes #9853
2016-06-24 16:48:37 +08:00
Remita Amine
fee70322d7 [appletrailers] correct thumbnail fallback 2016-06-23 19:03:34 +01:00
Remita Amine
8065d6c55f [dcn] extend _VALID_URL for awaan.ae and extract all available formats 2016-06-23 17:22:15 +01:00
Remita Amine
494172d2e5 [appletrailers] extract info from an alternative source if available(closes #8422)(closes #8422) 2016-06-23 15:49:42 +01:00
Remita Amine
6e3c2047f8 [tvp] extract all formats and detect erros 2016-06-23 04:36:16 +01:00
Sergey M․
011bd3221b release 2016.06.23.1 2016-06-23 09:42:56 +07:00
Sergey M․
b46eabecd3 [jsinterp] Relax JS function regex (Closes #9863) 2016-06-23 09:41:34 +07:00
Remita Amine
0437307a41 [nbc:nbcnews] improve extraction and add msnbc to the extractor 2016-06-23 01:36:19 +01:00
Remita Amine
22b7ac13ef [tf1] fix wat id extraction(closes #9862) 2016-06-23 00:14:34 +01:00
14 changed files with 280 additions and 207 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.25**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.06.23
[debug] youtube-dl version 2016.06.25
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -44,8 +44,8 @@
- **appletrailers:section**
- **archive.org**: archive.org videos
- **ARD**
- **ARD:mediathek**: Saarländischer Rundfunk
- **ARD:mediathek**
- **ARD:mediathek**: Saarländischer Rundfunk
- **arte.tv**
- **arte.tv:+7**
- **arte.tv:cinema**
@@ -385,7 +385,6 @@
- **MovieFap**
- **Moviezine**
- **MPORA**
- **MSNBC**
- **MTV**
- **mtv.de**
- **mtviggy.com**

View File

@@ -21,25 +21,37 @@ try:
import py2exe
except ImportError:
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
print("Cannot import py2exe", file=sys.stderr)
print('Cannot import py2exe', file=sys.stderr)
exit(1)
py2exe_options = {
"bundle_files": 1,
"compressed": 1,
"optimize": 2,
"dist_dir": '.',
"dll_excludes": ['w9xpopen.exe', 'crypt32.dll'],
'bundle_files': 1,
'compressed': 1,
'optimize': 2,
'dist_dir': '.',
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
}
# Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(),
'youtube_dl/version.py', 'exec'))
DESCRIPTION = 'YouTube video downloader'
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
py2exe_console = [{
"script": "./youtube_dl/__main__.py",
"dest_base": "youtube-dl",
'script': './youtube_dl/__main__.py',
'dest_base': 'youtube-dl',
'version': __version__,
'description': DESCRIPTION,
'comments': LONG_DESCRIPTION,
'product_name': 'youtube-dl',
'product_version': __version__,
}]
py2exe_params = {
'console': py2exe_console,
'options': {"py2exe": py2exe_options},
'options': {'py2exe': py2exe_options},
'zipfile': None
}
@@ -72,7 +84,7 @@ else:
params['scripts'] = ['bin/youtube-dl']
class build_lazy_extractors(Command):
description = "Build the extractor lazy loading module"
description = 'Build the extractor lazy loading module'
user_options = []
def initialize_options(self):
@@ -87,16 +99,11 @@ class build_lazy_extractors(Command):
dry_run=self.dry_run,
)
# Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(),
'youtube_dl/version.py', 'exec'))
setup(
name='youtube_dl',
version=__version__,
description='YouTube video downloader',
long_description='Small command-line program to download videos from'
' YouTube.com and other video sites.',
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
url='https://github.com/rg3/youtube-dl',
author='Ricardo Garcia',
author_email='ytdl@yt-dl.org',
@@ -112,17 +119,17 @@ setup(
# test_requires = ['nosetest'],
classifiers=[
"Topic :: Multimedia :: Video",
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
"License :: Public Domain",
"Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
'Topic :: Multimedia :: Video',
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'License :: Public Domain',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
],
cmdclass={'build_lazy_extractors': build_lazy_extractors},

View File

@@ -7,6 +7,8 @@ from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
parse_duration,
unified_strdate,
)
@@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor):
_TESTS = [{
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
'info_dict': {
'id': 'manofsteel',
'id': '5111',
'title': 'Man of Steel',
},
'playlist': [
{
@@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor):
'id': 'blackthorn',
},
'playlist_mincount': 2,
'expected_warnings': ['Unable to download JSON metadata'],
}, {
# json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
'info_dict': {
'id': '15881',
'title': 'Kung Fu Panda 3',
},
'playlist_mincount': 4,
}, {
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
'only_matching': True,
@@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor):
movie = mobj.group('movie')
uploader_id = mobj.group('company')
webpage = self._download_webpage(url, movie)
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
film_data = self._download_json(
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
film_id, fatal=False)
if film_data:
entries = []
for clip in film_data.get('clips', []):
clip_title = clip['title']
formats = []
for version, version_data in clip.get('versions', {}).items():
for size, size_data in version_data.get('sizes', {}).items():
src = size_data.get('src')
if not src:
continue
formats.append({
'format_id': '%s-%s' % (version, size),
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')),
'language': version[:2],
})
self._sort_formats(formats)
entries.append({
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
'formats': formats,
'title': clip_title,
'thumbnail': clip.get('screen') or clip.get('thumb'),
'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
'upload_date': unified_strdate(clip.get('posted')),
'uploader_id': uploader_id,
})
page_data = film_data.get('page', {})
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
def fix_html(s):

View File

@@ -20,7 +20,7 @@ from ..utils import (
class DCNIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
def _real_extract(self, url):
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
@@ -55,30 +55,32 @@ class DCNBaseIE(InfoExtractor):
'is_live': is_live,
}
def _extract_video_formats(self, webpage, video_id, entry_protocol):
def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
formats = []
m3u8_url = self._html_search_regex(
r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False)
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None))
rtsp_url = self._search_regex(
r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
if rtsp_url:
formats.append({
'url': rtsp_url,
'format_id': 'rtsp',
})
format_url_base = 'http' + self._html_search_regex(
[
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
r'<a[^>]+href="rtsp(://[^"]+)"'
], webpage, 'format url')
# TODO: Current DASH formats are broken - $Time$ pattern in
# <SegmentTemplate> not implemented yet
# formats.extend(self._extract_mpd_formats(
# format_url_base + '/manifest.mpd',
# video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_m3u8_formats(
format_url_base + '/playlist.m3u8', video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
format_url_base + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
self._sort_formats(formats)
return formats
class DCNVideoIE(DCNBaseIE):
IE_NAME = 'dcn:video'
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
'info_dict':
{
@@ -94,7 +96,10 @@ class DCNVideoIE(DCNBaseIE):
# m3u8 download
'skip_download': True,
},
}
}, {
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -120,7 +125,7 @@ class DCNVideoIE(DCNBaseIE):
class DCNLiveIE(DCNBaseIE):
IE_NAME = 'dcn:live'
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
def _real_extract(self, url):
channel_id = self._match_id(url)
@@ -147,7 +152,7 @@ class DCNLiveIE(DCNBaseIE):
class DCNSeasonIE(InfoExtractor):
IE_NAME = 'dcn:season'
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
_TEST = {
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
'info_dict':

View File

@@ -480,7 +480,6 @@ from .nbc import (
NBCNewsIE,
NBCSportsIE,
NBCSportsVPlayerIE,
MSNBCIE,
)
from .ndr import (
NDRIE,

View File

@@ -102,11 +102,11 @@ class MixcloudIE(InfoExtractor):
description = self._og_search_description(webpage)
like_count = parse_count(self._search_regex(
r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)',
webpage, 'like count', fatal=False))
webpage, 'like count', default=None))
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>'],
webpage, 'play count', fatal=False))
webpage, 'play count', default=None))
return {
'id': track_id,

View File

@@ -9,10 +9,6 @@ from ..utils import (
lowercase_escape,
smuggle_url,
unescapeHTML,
update_url_query,
int_or_none,
HEADRequest,
parse_iso8601,
)
@@ -192,9 +188,9 @@ class CSNNEIE(InfoExtractor):
class NBCNewsIE(ThePlatformIE):
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today)\.com/
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/
(?:video/.+?/(?P<id>\d+)|
([^/]+/)*(?P<display_id>[^/?]+))
([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+))
'''
_TESTS = [
@@ -216,13 +212,16 @@ class NBCNewsIE(ThePlatformIE):
'ext': 'mp4',
'title': 'How Twitter Reacted To The Snowden Interview',
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
'uploader': 'NBCU-NEWS',
'timestamp': 1401363060,
'upload_date': '20140529',
},
},
{
'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
'md5': 'fdbf39ab73a72df5896b6234ff98518a',
'info_dict': {
'id': 'Wjf9EDR3A_60',
'id': '529953347624',
'ext': 'mp4',
'title': 'FULL EPISODE: Family Business',
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
@@ -237,6 +236,9 @@ class NBCNewsIE(ThePlatformIE):
'ext': 'mp4',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
'timestamp': 1423104900,
'uploader': 'NBCU-NEWS',
'upload_date': '20150205',
},
},
{
@@ -245,10 +247,12 @@ class NBCNewsIE(ThePlatformIE):
'info_dict': {
'id': '529953347624',
'ext': 'mp4',
'title': 'Volkswagen U.S. Chief: We \'Totally Screwed Up\'',
'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
'upload_date': '20150922',
'timestamp': 1442917800,
'uploader': 'NBCU-NEWS',
},
'expected_warnings': ['http-6000 is not available']
},
{
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
@@ -260,6 +264,22 @@ class NBCNewsIE(ThePlatformIE):
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
'upload_date': '20160420',
'timestamp': 1461152093,
'uploader': 'NBCU-NEWS',
},
},
{
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
'info_dict': {
'id': '314487875924',
'ext': 'mp4',
'title': 'The chaotic GOP immigration vote',
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1406937606,
'upload_date': '20140802',
'uploader': 'NBCU-NEWS',
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
},
},
{
@@ -290,105 +310,28 @@ class NBCNewsIE(ThePlatformIE):
}
else:
# "feature" and "nightly-news" pages use theplatform.com
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
info = None
bootstrap_json = self._search_regex(
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
webpage, 'bootstrap json', default=None)
bootstrap = self._parse_json(
bootstrap_json, display_id, transform_source=unescapeHTML)
if 'results' in bootstrap:
info = bootstrap['results'][0]['video']
elif 'video' in bootstrap:
info = bootstrap['video']
else:
info = bootstrap
video_id = info['mpxId']
title = info['title']
subtitles = {}
caption_links = info.get('captionLinks')
if caption_links:
for (sub_key, sub_ext) in (('smpte-tt', 'ttml'), ('web-vtt', 'vtt'), ('srt', 'srt')):
sub_url = caption_links.get(sub_key)
if sub_url:
subtitles.setdefault('en', []).append({
'url': sub_url,
'ext': sub_ext,
})
formats = []
for video_asset in info['videoAssets']:
video_url = video_asset.get('publicUrl')
if not video_url:
continue
container = video_asset.get('format')
asset_type = video_asset.get('assetType') or ''
if container == 'ISM' or asset_type == 'FireTV-Once':
continue
elif asset_type == 'OnceURL':
tp_formats, tp_subtitles = self._extract_theplatform_smil(
video_url, video_id)
formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
video_id = mobj.group('mpx_id')
if not video_id.isdigit():
webpage = self._download_webpage(url, video_id)
info = None
bootstrap_json = self._search_regex(
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
webpage, 'bootstrap json', default=None)
bootstrap = self._parse_json(
bootstrap_json, video_id, transform_source=unescapeHTML)
if 'results' in bootstrap:
info = bootstrap['results'][0]['video']
elif 'video' in bootstrap:
info = bootstrap['video']
else:
tbr = int_or_none(video_asset.get('bitRate') or video_asset.get('bitrate'), 1000)
format_id = 'http%s' % ('-%d' % tbr if tbr else '')
video_url = update_url_query(
video_url, {'format': 'redirect'})
# resolve the url so that we can check availability and detect the correct extension
head = self._request_webpage(
HEADRequest(video_url), video_id,
'Checking %s url' % format_id,
'%s is not available' % format_id,
fatal=False)
if head:
video_url = head.geturl()
formats.append({
'format_id': format_id,
'url': video_url,
'width': int_or_none(video_asset.get('width')),
'height': int_or_none(video_asset.get('height')),
'tbr': tbr,
'container': video_asset.get('format'),
})
self._sort_formats(formats)
info = bootstrap
video_id = info['mpxId']
return {
'_type': 'url_transparent',
'id': video_id,
'title': title,
'description': info.get('description'),
'thumbnail': info.get('thumbnail'),
'duration': int_or_none(info.get('duration')),
'timestamp': parse_iso8601(info.get('pubDate') or info.get('pub_date')),
'formats': formats,
'subtitles': subtitles,
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id,
'ie_key': 'ThePlatformFeed',
}
class MSNBCIE(InfoExtractor):
# https URLs redirect to corresponding http ones
_VALID_URL = r'https?://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
_TEST = {
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
'info_dict': {
'id': 'n_hayes_Aimm_140801_272214',
'ext': 'mp4',
'title': 'The chaotic GOP immigration vote',
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1406937606,
'upload_date': '20140802',
'uploader': 'NBCU-NEWS',
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embed_url = self._html_search_meta('embedURL', webpage)
return self.url_result(embed_url)

View File

@@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
wat_id = self._html_search_regex(
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1',
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
webpage, 'wat id', group='id')
return self.url_result('wat:%s' % wat_id, 'Wat')

View File

@@ -4,6 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
clean_html,
get_element_by_attribute,
ExtractorError,
)
class TVPIE(InfoExtractor):
@@ -21,7 +27,7 @@ class TVPIE(InfoExtractor):
},
}, {
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
'md5': 'c3b15ed1af288131115ff17a17c19dda',
'md5': 'b0005b542e5b4de643a9690326ab1257',
'info_dict': {
'id': '17916176',
'ext': 'mp4',
@@ -53,6 +59,11 @@ class TVPIE(InfoExtractor):
webpage = self._download_webpage(
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
error_massage = get_element_by_attribute('class', 'msg error', webpage)
if error_massage:
raise ExtractorError('%s said: %s' % (
self.IE_NAME, clean_html(error_massage)), expected=True)
title = self._search_regex(
r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
webpage, 'title', group='title')
@@ -66,24 +77,50 @@ class TVPIE(InfoExtractor):
r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
video_url = self._search_regex(
r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
if not video_url:
r'0:{src:([\'"])(?P<url>.*?)\1', webpage,
'formats', group='url', default=None)
if not video_url or 'material_niedostepny.mp4' in video_url:
video_url = self._download_json(
'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
video_id)['video_url']
ext = video_url.rsplit('.', 1)[-1]
if ext != 'ism/manifest':
if '/' in ext:
ext = 'mp4'
formats = []
video_url_base = self._search_regex(
r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
video_url, 'video base url', default=None)
if video_url_base:
# TODO: Current DASH formats are broken - $Time$ pattern in
# <SegmentTemplate> not implemented yet
# formats.extend(self._extract_mpd_formats(
# video_url_base + '.ism/video.mpd',
# video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_f4m_formats(
video_url_base + '.ism/video.f4m',
video_id, f4m_id='hds', fatal=False))
m3u8_formats = self._extract_m3u8_formats(
video_url_base + '.ism/video.m3u8', video_id,
'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
self._sort_formats(m3u8_formats)
m3u8_formats = list(filter(
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
m3u8_formats))
formats.extend(m3u8_formats)
for i, m3u8_format in enumerate(m3u8_formats, 2):
http_url = '%s-%d.mp4' % (video_url_base, i)
if self._is_valid_url(http_url, video_id):
f = m3u8_format.copy()
f.update({
'url': http_url,
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(f)
else:
formats = [{
'format_id': 'direct',
'url': video_url,
'ext': ext,
'ext': determine_ext(video_url, 'mp4'),
}]
else:
m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
self._sort_formats(formats)

View File

@@ -16,6 +16,7 @@ from ..utils import (
ExtractorError,
InAdvancePagedList,
int_or_none,
NO_DEFAULT,
RegexNotFoundError,
sanitized_Request,
smuggle_url,
@@ -56,6 +57,26 @@ class VimeoBaseInfoExtractor(InfoExtractor):
self._set_vimeo_cookie('vuid', vuid)
self._download_webpage(login_request, None, False, 'Wrong login info')
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
token, vuid = self._extract_xsrft_and_vuid(webpage)
data = urlencode_postdata({
'password': password,
'token': token,
})
if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://')
password_request = sanitized_Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Referer', url)
self._set_vimeo_cookie('vuid', vuid)
return self._download_webpage(
password_request, video_id,
'Verifying the password', 'Wrong password')
def _extract_xsrft_and_vuid(self, webpage):
xsrft = self._search_regex(
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
@@ -344,26 +365,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
if mobj:
return mobj.group(1)
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
token, vuid = self._extract_xsrft_and_vuid(webpage)
data = urlencode_postdata({
'password': password,
'token': token,
})
if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://')
password_request = sanitized_Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Referer', url)
self._set_vimeo_cookie('vuid', vuid)
return self._download_webpage(
password_request, video_id,
'Verifying the password', 'Wrong password')
def _verify_player_video_password(self, url, video_id):
password = self._downloader.params.get('videopassword')
if password is None:
@@ -791,12 +792,39 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$',
'uploader_id': 'user22258446',
}
}, {
'note': 'Password protected',
'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
'info_dict': {
'id': '138823582',
'ext': 'mp4',
'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
'uploader': 'TMB',
'uploader_id': 'user37284429',
},
'params': {
'videopassword': 'holygrail',
},
}]
def _real_initialize(self):
self._login()
def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
webpage = self._download_webpage(webpage_url, video_id)
config_url = self._html_search_regex(
r'data-config-url="([^"]+)"', webpage, 'config URL',
default=NO_DEFAULT if video_password_verified else None)
if config_url is None:
self._verify_video_password(webpage_url, video_id, webpage)
config_url = self._get_config_url(
webpage_url, video_id, video_password_verified=True)
return config_url
def _real_extract(self, url):
video_id = self._match_id(url)
config = self._download_json(
'https://player.vimeo.com/video/%s/config' % video_id, video_id)
config_url = self._get_config_url(url, video_id)
config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id)
self._vimeo_sort_formats(info_dict['formats'])
info_dict['id'] = video_id

View File

@@ -501,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'youtube_include_dash_manifest': True,
'format': '141',
},
'skip': 'format 141 not served anymore',
},
# DASH manifest with encrypted signature
{
@@ -517,7 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
'params': {
'youtube_include_dash_manifest': True,
'format': '141',
'format': '141/bestaudio[ext=m4a]',
},
},
# JS player signature function name containing $
@@ -537,7 +538,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
'params': {
'youtube_include_dash_manifest': True,
'format': '141',
'format': '141/bestaudio[ext=m4a]',
},
},
# Controversy video
@@ -618,7 +619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
'license': 'Standard YouTube License',
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
'uploader': 'Olympics',
'uploader': 'Olympic',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
},
'params': {
@@ -671,7 +672,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
'uploader': 'dorappi2000',
'license': 'Standard YouTube License',
'formats': 'mincount:33',
'formats': 'mincount:32',
},
},
# DASH manifest with segment_list
@@ -691,7 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'params': {
'youtube_include_dash_manifest': True,
'format': '135', # bestvideo
}
},
'skip': 'This live event has ended.',
},
{
# Multifeed videos (multiple cameras), URL is for Main Camera
@@ -762,6 +764,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
},
'playlist_count': 2,
'skip': 'Not multifeed anymore',
},
{
'url': 'http://vid.plus/FlRa-iH7PGw',
@@ -814,6 +817,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'This video does not exist.',
},
{
# Video licensed under Creative Commons
@@ -1331,7 +1335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:[a-zA-Z-]+="[^"]*"\s+)*?
(?:title|href)="([^"]+)"\s+
(?:[a-zA-Z-]+="[^"]*"\s+)*?
class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
class="[^"]*"[^>]*>
[^<]+\.{3}\s*
</a>
''', r'\1', video_description)

View File

@@ -232,7 +232,7 @@ class JSInterpreter(object):
def extract_function(self, funcname):
func_m = re.search(
r'''(?x)
(?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
(?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
\((?P<args>[^)]*)\)\s*
\{(?P<code>[^}]+)\}''' % (
re.escape(funcname), re.escape(funcname), re.escape(funcname)),

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.06.23'
__version__ = '2016.06.25'