mirror of
https://gitlab.com/ytdl-org/youtube-dl.git
synced 2026-01-25 00:00:04 -05:00
Compare commits
15 Commits
2016.06.23
...
2016.06.25
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b4241e308e | ||
|
|
3d4b08dfc7 | ||
|
|
be49068d65 | ||
|
|
525cedb971 | ||
|
|
de3c7fe0d4 | ||
|
|
896cc72750 | ||
|
|
c1ff6e1ad0 | ||
|
|
fee70322d7 | ||
|
|
8065d6c55f | ||
|
|
494172d2e5 | ||
|
|
6e3c2047f8 | ||
|
|
011bd3221b | ||
|
|
b46eabecd3 | ||
|
|
0437307a41 | ||
|
|
22b7ac13ef |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.25**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.06.23
|
||||
[debug] youtube-dl version 2016.06.25
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -44,8 +44,8 @@
|
||||
- **appletrailers:section**
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**: Saarländischer Rundfunk
|
||||
- **ARD:mediathek**
|
||||
- **ARD:mediathek**: Saarländischer Rundfunk
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:cinema**
|
||||
@@ -385,7 +385,6 @@
|
||||
- **MovieFap**
|
||||
- **Moviezine**
|
||||
- **MPORA**
|
||||
- **MSNBC**
|
||||
- **MTV**
|
||||
- **mtv.de**
|
||||
- **mtviggy.com**
|
||||
|
||||
63
setup.py
63
setup.py
@@ -21,25 +21,37 @@ try:
|
||||
import py2exe
|
||||
except ImportError:
|
||||
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
||||
print("Cannot import py2exe", file=sys.stderr)
|
||||
print('Cannot import py2exe', file=sys.stderr)
|
||||
exit(1)
|
||||
|
||||
py2exe_options = {
|
||||
"bundle_files": 1,
|
||||
"compressed": 1,
|
||||
"optimize": 2,
|
||||
"dist_dir": '.',
|
||||
"dll_excludes": ['w9xpopen.exe', 'crypt32.dll'],
|
||||
'bundle_files': 1,
|
||||
'compressed': 1,
|
||||
'optimize': 2,
|
||||
'dist_dir': '.',
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
}
|
||||
|
||||
# Get the version from youtube_dl/version.py without importing the package
|
||||
exec(compile(open('youtube_dl/version.py').read(),
|
||||
'youtube_dl/version.py', 'exec'))
|
||||
|
||||
DESCRIPTION = 'YouTube video downloader'
|
||||
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
|
||||
|
||||
py2exe_console = [{
|
||||
"script": "./youtube_dl/__main__.py",
|
||||
"dest_base": "youtube-dl",
|
||||
'script': './youtube_dl/__main__.py',
|
||||
'dest_base': 'youtube-dl',
|
||||
'version': __version__,
|
||||
'description': DESCRIPTION,
|
||||
'comments': LONG_DESCRIPTION,
|
||||
'product_name': 'youtube-dl',
|
||||
'product_version': __version__,
|
||||
}]
|
||||
|
||||
py2exe_params = {
|
||||
'console': py2exe_console,
|
||||
'options': {"py2exe": py2exe_options},
|
||||
'options': {'py2exe': py2exe_options},
|
||||
'zipfile': None
|
||||
}
|
||||
|
||||
@@ -72,7 +84,7 @@ else:
|
||||
params['scripts'] = ['bin/youtube-dl']
|
||||
|
||||
class build_lazy_extractors(Command):
|
||||
description = "Build the extractor lazy loading module"
|
||||
description = 'Build the extractor lazy loading module'
|
||||
user_options = []
|
||||
|
||||
def initialize_options(self):
|
||||
@@ -87,16 +99,11 @@ class build_lazy_extractors(Command):
|
||||
dry_run=self.dry_run,
|
||||
)
|
||||
|
||||
# Get the version from youtube_dl/version.py without importing the package
|
||||
exec(compile(open('youtube_dl/version.py').read(),
|
||||
'youtube_dl/version.py', 'exec'))
|
||||
|
||||
setup(
|
||||
name='youtube_dl',
|
||||
version=__version__,
|
||||
description='YouTube video downloader',
|
||||
long_description='Small command-line program to download videos from'
|
||||
' YouTube.com and other video sites.',
|
||||
description=DESCRIPTION,
|
||||
long_description=LONG_DESCRIPTION,
|
||||
url='https://github.com/rg3/youtube-dl',
|
||||
author='Ricardo Garcia',
|
||||
author_email='ytdl@yt-dl.org',
|
||||
@@ -112,17 +119,17 @@ setup(
|
||||
# test_requires = ['nosetest'],
|
||||
|
||||
classifiers=[
|
||||
"Topic :: Multimedia :: Video",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Environment :: Console",
|
||||
"License :: Public Domain",
|
||||
"Programming Language :: Python :: 2.6",
|
||||
"Programming Language :: Python :: 2.7",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.2",
|
||||
"Programming Language :: Python :: 3.3",
|
||||
"Programming Language :: Python :: 3.4",
|
||||
"Programming Language :: Python :: 3.5",
|
||||
'Topic :: Multimedia :: Video',
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Environment :: Console',
|
||||
'License :: Public Domain',
|
||||
'Programming Language :: Python :: 2.6',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.2',
|
||||
'Programming Language :: Python :: 3.3',
|
||||
'Programming Language :: Python :: 3.4',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
],
|
||||
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
|
||||
@@ -7,6 +7,8 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
|
||||
'info_dict': {
|
||||
'id': 'manofsteel',
|
||||
'id': '5111',
|
||||
'title': 'Man of Steel',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
@@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor):
|
||||
'id': 'blackthorn',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
}, {
|
||||
# json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
|
||||
'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
|
||||
'info_dict': {
|
||||
'id': '15881',
|
||||
'title': 'Kung Fu Panda 3',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
||||
'only_matching': True,
|
||||
@@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor):
|
||||
movie = mobj.group('movie')
|
||||
uploader_id = mobj.group('company')
|
||||
|
||||
webpage = self._download_webpage(url, movie)
|
||||
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
|
||||
film_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
|
||||
film_id, fatal=False)
|
||||
|
||||
if film_data:
|
||||
entries = []
|
||||
for clip in film_data.get('clips', []):
|
||||
clip_title = clip['title']
|
||||
|
||||
formats = []
|
||||
for version, version_data in clip.get('versions', {}).items():
|
||||
for size, size_data in version_data.get('sizes', {}).items():
|
||||
src = size_data.get('src')
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (version, size),
|
||||
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
|
||||
'width': int_or_none(size_data.get('width')),
|
||||
'height': int_or_none(size_data.get('height')),
|
||||
'language': version[:2],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
|
||||
'formats': formats,
|
||||
'title': clip_title,
|
||||
'thumbnail': clip.get('screen') or clip.get('thumb'),
|
||||
'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
|
||||
'upload_date': unified_strdate(clip.get('posted')),
|
||||
'uploader_id': uploader_id,
|
||||
})
|
||||
|
||||
page_data = film_data.get('page', {})
|
||||
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
|
||||
|
||||
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
|
||||
def fix_html(s):
|
||||
|
||||
@@ -20,7 +20,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class DCNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||
@@ -55,30 +55,32 @@ class DCNBaseIE(InfoExtractor):
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _extract_video_formats(self, webpage, video_id, entry_protocol):
|
||||
def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
|
||||
formats = []
|
||||
m3u8_url = self._html_search_regex(
|
||||
r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False)
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None))
|
||||
|
||||
rtsp_url = self._search_regex(
|
||||
r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
|
||||
if rtsp_url:
|
||||
formats.append({
|
||||
'url': rtsp_url,
|
||||
'format_id': 'rtsp',
|
||||
})
|
||||
|
||||
format_url_base = 'http' + self._html_search_regex(
|
||||
[
|
||||
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
|
||||
r'<a[^>]+href="rtsp(://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# format_url_base + '/manifest.mpd',
|
||||
# video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url_base + '/playlist.m3u8', video_id, 'mp4',
|
||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url_base + '/manifest.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
|
||||
class DCNVideoIE(DCNBaseIE):
|
||||
IE_NAME = 'dcn:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
|
||||
'info_dict':
|
||||
{
|
||||
@@ -94,7 +96,10 @@ class DCNVideoIE(DCNBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -120,7 +125,7 @@ class DCNVideoIE(DCNBaseIE):
|
||||
|
||||
class DCNLiveIE(DCNBaseIE):
|
||||
IE_NAME = 'dcn:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
@@ -147,7 +152,7 @@ class DCNLiveIE(DCNBaseIE):
|
||||
|
||||
class DCNSeasonIE(InfoExtractor):
|
||||
IE_NAME = 'dcn:season'
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
|
||||
_TEST = {
|
||||
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
|
||||
'info_dict':
|
||||
|
||||
@@ -480,7 +480,6 @@ from .nbc import (
|
||||
NBCNewsIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
MSNBCIE,
|
||||
)
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
|
||||
@@ -102,11 +102,11 @@ class MixcloudIE(InfoExtractor):
|
||||
description = self._og_search_description(webpage)
|
||||
like_count = parse_count(self._search_regex(
|
||||
r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)',
|
||||
webpage, 'like count', fatal=False))
|
||||
webpage, 'like count', default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
r'/listeners/?">([0-9,.]+)</a>'],
|
||||
webpage, 'play count', fatal=False))
|
||||
webpage, 'play count', default=None))
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
|
||||
@@ -9,10 +9,6 @@ from ..utils import (
|
||||
lowercase_escape,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
HEADRequest,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -192,9 +188,9 @@ class CSNNEIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCNewsIE(ThePlatformIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today)\.com/
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/
|
||||
(?:video/.+?/(?P<id>\d+)|
|
||||
([^/]+/)*(?P<display_id>[^/?]+))
|
||||
([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+))
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
@@ -216,13 +212,16 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'How Twitter Reacted To The Snowden Interview',
|
||||
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
||||
'uploader': 'NBCU-NEWS',
|
||||
'timestamp': 1401363060,
|
||||
'upload_date': '20140529',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
|
||||
'md5': 'fdbf39ab73a72df5896b6234ff98518a',
|
||||
'info_dict': {
|
||||
'id': 'Wjf9EDR3A_60',
|
||||
'id': '529953347624',
|
||||
'ext': 'mp4',
|
||||
'title': 'FULL EPISODE: Family Business',
|
||||
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
|
||||
@@ -237,6 +236,9 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
||||
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||
'timestamp': 1423104900,
|
||||
'uploader': 'NBCU-NEWS',
|
||||
'upload_date': '20150205',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -245,10 +247,12 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'info_dict': {
|
||||
'id': '529953347624',
|
||||
'ext': 'mp4',
|
||||
'title': 'Volkswagen U.S. Chief: We \'Totally Screwed Up\'',
|
||||
'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
|
||||
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
|
||||
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
|
||||
'upload_date': '20150922',
|
||||
'timestamp': 1442917800,
|
||||
'uploader': 'NBCU-NEWS',
|
||||
},
|
||||
'expected_warnings': ['http-6000 is not available']
|
||||
},
|
||||
{
|
||||
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
|
||||
@@ -260,6 +264,22 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
|
||||
'upload_date': '20160420',
|
||||
'timestamp': 1461152093,
|
||||
'uploader': 'NBCU-NEWS',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
||||
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
||||
'info_dict': {
|
||||
'id': '314487875924',
|
||||
'ext': 'mp4',
|
||||
'title': 'The chaotic GOP immigration vote',
|
||||
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1406937606,
|
||||
'upload_date': '20140802',
|
||||
'uploader': 'NBCU-NEWS',
|
||||
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -290,105 +310,28 @@ class NBCNewsIE(ThePlatformIE):
|
||||
}
|
||||
else:
|
||||
# "feature" and "nightly-news" pages use theplatform.com
|
||||
display_id = mobj.group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
info = None
|
||||
bootstrap_json = self._search_regex(
|
||||
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
|
||||
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
|
||||
webpage, 'bootstrap json', default=None)
|
||||
bootstrap = self._parse_json(
|
||||
bootstrap_json, display_id, transform_source=unescapeHTML)
|
||||
if 'results' in bootstrap:
|
||||
info = bootstrap['results'][0]['video']
|
||||
elif 'video' in bootstrap:
|
||||
info = bootstrap['video']
|
||||
else:
|
||||
info = bootstrap
|
||||
video_id = info['mpxId']
|
||||
title = info['title']
|
||||
|
||||
subtitles = {}
|
||||
caption_links = info.get('captionLinks')
|
||||
if caption_links:
|
||||
for (sub_key, sub_ext) in (('smpte-tt', 'ttml'), ('web-vtt', 'vtt'), ('srt', 'srt')):
|
||||
sub_url = caption_links.get(sub_key)
|
||||
if sub_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': sub_url,
|
||||
'ext': sub_ext,
|
||||
})
|
||||
|
||||
formats = []
|
||||
for video_asset in info['videoAssets']:
|
||||
video_url = video_asset.get('publicUrl')
|
||||
if not video_url:
|
||||
continue
|
||||
container = video_asset.get('format')
|
||||
asset_type = video_asset.get('assetType') or ''
|
||||
if container == 'ISM' or asset_type == 'FireTV-Once':
|
||||
continue
|
||||
elif asset_type == 'OnceURL':
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
video_url, video_id)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
video_id = mobj.group('mpx_id')
|
||||
if not video_id.isdigit():
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = None
|
||||
bootstrap_json = self._search_regex(
|
||||
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
|
||||
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
|
||||
webpage, 'bootstrap json', default=None)
|
||||
bootstrap = self._parse_json(
|
||||
bootstrap_json, video_id, transform_source=unescapeHTML)
|
||||
if 'results' in bootstrap:
|
||||
info = bootstrap['results'][0]['video']
|
||||
elif 'video' in bootstrap:
|
||||
info = bootstrap['video']
|
||||
else:
|
||||
tbr = int_or_none(video_asset.get('bitRate') or video_asset.get('bitrate'), 1000)
|
||||
format_id = 'http%s' % ('-%d' % tbr if tbr else '')
|
||||
video_url = update_url_query(
|
||||
video_url, {'format': 'redirect'})
|
||||
# resolve the url so that we can check availability and detect the correct extension
|
||||
head = self._request_webpage(
|
||||
HEADRequest(video_url), video_id,
|
||||
'Checking %s url' % format_id,
|
||||
'%s is not available' % format_id,
|
||||
fatal=False)
|
||||
if head:
|
||||
video_url = head.geturl()
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'width': int_or_none(video_asset.get('width')),
|
||||
'height': int_or_none(video_asset.get('height')),
|
||||
'tbr': tbr,
|
||||
'container': video_asset.get('format'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info = bootstrap
|
||||
video_id = info['mpxId']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('thumbnail'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'timestamp': parse_iso8601(info.get('pubDate') or info.get('pub_date')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
|
||||
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id,
|
||||
'ie_key': 'ThePlatformFeed',
|
||||
}
|
||||
|
||||
|
||||
class MSNBCIE(InfoExtractor):
|
||||
# https URLs redirect to corresponding http ones
|
||||
_VALID_URL = r'https?://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
||||
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
||||
'info_dict': {
|
||||
'id': 'n_hayes_Aimm_140801_272214',
|
||||
'ext': 'mp4',
|
||||
'title': 'The chaotic GOP immigration vote',
|
||||
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1406937606,
|
||||
'upload_date': '20140802',
|
||||
'uploader': 'NBCU-NEWS',
|
||||
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_url = self._html_search_meta('embedURL', webpage)
|
||||
return self.url_result(embed_url)
|
||||
|
||||
@@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
wat_id = self._html_search_regex(
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1',
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
|
||||
webpage, 'wat id', group='id')
|
||||
return self.url_result('wat:%s' % wat_id, 'Wat')
|
||||
|
||||
@@ -4,6 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
clean_html,
|
||||
get_element_by_attribute,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class TVPIE(InfoExtractor):
|
||||
@@ -21,7 +27,7 @@ class TVPIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
||||
'md5': 'c3b15ed1af288131115ff17a17c19dda',
|
||||
'md5': 'b0005b542e5b4de643a9690326ab1257',
|
||||
'info_dict': {
|
||||
'id': '17916176',
|
||||
'ext': 'mp4',
|
||||
@@ -53,6 +59,11 @@ class TVPIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
|
||||
|
||||
error_massage = get_element_by_attribute('class', 'msg error', webpage)
|
||||
if error_massage:
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, clean_html(error_massage)), expected=True)
|
||||
|
||||
title = self._search_regex(
|
||||
r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
|
||||
webpage, 'title', group='title')
|
||||
@@ -66,24 +77,50 @@ class TVPIE(InfoExtractor):
|
||||
r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
|
||||
if not video_url:
|
||||
r'0:{src:([\'"])(?P<url>.*?)\1', webpage,
|
||||
'formats', group='url', default=None)
|
||||
if not video_url or 'material_niedostepny.mp4' in video_url:
|
||||
video_url = self._download_json(
|
||||
'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
|
||||
video_id)['video_url']
|
||||
|
||||
ext = video_url.rsplit('.', 1)[-1]
|
||||
if ext != 'ism/manifest':
|
||||
if '/' in ext:
|
||||
ext = 'mp4'
|
||||
formats = []
|
||||
video_url_base = self._search_regex(
|
||||
r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
|
||||
video_url, 'video base url', default=None)
|
||||
if video_url_base:
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# video_url_base + '.ism/video.mpd',
|
||||
# video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url_base + '.ism/video.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url_base + '.ism/video.m3u8', video_id,
|
||||
'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
self._sort_formats(m3u8_formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
m3u8_formats))
|
||||
formats.extend(m3u8_formats)
|
||||
for i, m3u8_format in enumerate(m3u8_formats, 2):
|
||||
http_url = '%s-%d.mp4' % (video_url_base, i)
|
||||
if self._is_valid_url(http_url, video_id):
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': http_url,
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
else:
|
||||
formats = [{
|
||||
'format_id': 'direct',
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'ext': determine_ext(video_url, 'mp4'),
|
||||
}]
|
||||
else:
|
||||
m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
RegexNotFoundError,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
@@ -56,6 +57,26 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
data = urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
})
|
||||
if url.startswith('http://'):
|
||||
# vimeo only supports https now, but the user can give an http url
|
||||
url = url.replace('http://', 'https://')
|
||||
password_request = sanitized_Request(url + '/password', data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Referer', url)
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
return self._download_webpage(
|
||||
password_request, video_id,
|
||||
'Verifying the password', 'Wrong password')
|
||||
|
||||
def _extract_xsrft_and_vuid(self, webpage):
|
||||
xsrft = self._search_regex(
|
||||
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||
@@ -344,26 +365,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
data = urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
})
|
||||
if url.startswith('http://'):
|
||||
# vimeo only supports https now, but the user can give an http url
|
||||
url = url.replace('http://', 'https://')
|
||||
password_request = sanitized_Request(url + '/password', data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Referer', url)
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
return self._download_webpage(
|
||||
password_request, video_id,
|
||||
'Verifying the password', 'Wrong password')
|
||||
|
||||
def _verify_player_video_password(self, url, video_id):
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password is None:
|
||||
@@ -791,12 +792,39 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader_id': 'user22258446',
|
||||
}
|
||||
}, {
|
||||
'note': 'Password protected',
|
||||
'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
|
||||
'info_dict': {
|
||||
'id': '138823582',
|
||||
'ext': 'mp4',
|
||||
'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
|
||||
'uploader': 'TMB',
|
||||
'uploader_id': 'user37284429',
|
||||
},
|
||||
'params': {
|
||||
'videopassword': 'holygrail',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
config_url = self._html_search_regex(
|
||||
r'data-config-url="([^"]+)"', webpage, 'config URL',
|
||||
default=NO_DEFAULT if video_password_verified else None)
|
||||
if config_url is None:
|
||||
self._verify_video_password(webpage_url, video_id, webpage)
|
||||
config_url = self._get_config_url(
|
||||
webpage_url, video_id, video_password_verified=True)
|
||||
return config_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
config = self._download_json(
|
||||
'https://player.vimeo.com/video/%s/config' % video_id, video_id)
|
||||
config_url = self._get_config_url(url, video_id)
|
||||
config = self._download_json(config_url, video_id)
|
||||
info_dict = self._parse_config(config, video_id)
|
||||
self._vimeo_sort_formats(info_dict['formats'])
|
||||
info_dict['id'] = video_id
|
||||
|
||||
@@ -501,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'youtube_include_dash_manifest': True,
|
||||
'format': '141',
|
||||
},
|
||||
'skip': 'format 141 not served anymore',
|
||||
},
|
||||
# DASH manifest with encrypted signature
|
||||
{
|
||||
@@ -517,7 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
'format': '141',
|
||||
'format': '141/bestaudio[ext=m4a]',
|
||||
},
|
||||
},
|
||||
# JS player signature function name containing $
|
||||
@@ -537,7 +538,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
'format': '141',
|
||||
'format': '141/bestaudio[ext=m4a]',
|
||||
},
|
||||
},
|
||||
# Controversy video
|
||||
@@ -618,7 +619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
|
||||
'license': 'Standard YouTube License',
|
||||
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
|
||||
'uploader': 'Olympics',
|
||||
'uploader': 'Olympic',
|
||||
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
|
||||
},
|
||||
'params': {
|
||||
@@ -671,7 +672,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
|
||||
'uploader': 'dorappi2000',
|
||||
'license': 'Standard YouTube License',
|
||||
'formats': 'mincount:33',
|
||||
'formats': 'mincount:32',
|
||||
},
|
||||
},
|
||||
# DASH manifest with segment_list
|
||||
@@ -691,7 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
'format': '135', # bestvideo
|
||||
}
|
||||
},
|
||||
'skip': 'This live event has ended.',
|
||||
},
|
||||
{
|
||||
# Multifeed videos (multiple cameras), URL is for Main Camera
|
||||
@@ -762,6 +764,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'skip': 'Not multifeed anymore',
|
||||
},
|
||||
{
|
||||
'url': 'http://vid.plus/FlRa-iH7PGw',
|
||||
@@ -814,6 +817,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video does not exist.',
|
||||
},
|
||||
{
|
||||
# Video licensed under Creative Commons
|
||||
@@ -1331,7 +1335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(?:[a-zA-Z-]+="[^"]*"\s+)*?
|
||||
(?:title|href)="([^"]+)"\s+
|
||||
(?:[a-zA-Z-]+="[^"]*"\s+)*?
|
||||
class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
|
||||
class="[^"]*"[^>]*>
|
||||
[^<]+\.{3}\s*
|
||||
</a>
|
||||
''', r'\1', video_description)
|
||||
|
||||
@@ -232,7 +232,7 @@ class JSInterpreter(object):
|
||||
def extract_function(self, funcname):
|
||||
func_m = re.search(
|
||||
r'''(?x)
|
||||
(?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
|
||||
(?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
|
||||
\((?P<args>[^)]*)\)\s*
|
||||
\{(?P<code>[^}]+)\}''' % (
|
||||
re.escape(funcname), re.escape(funcname), re.escape(funcname)),
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.06.23'
|
||||
__version__ = '2016.06.25'
|
||||
|
||||
Reference in New Issue
Block a user