mirror of
https://gitlab.com/ytdl-org/youtube-dl.git
synced 2026-01-25 00:00:04 -05:00
Compare commits
28 Commits
2016.08.28
...
2016.08.31
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4fd350611c | ||
|
|
263fef43de | ||
|
|
a249ab83cb | ||
|
|
f7043ef39c | ||
|
|
64fc49aba0 | ||
|
|
245023a861 | ||
|
|
3c77a54d5d | ||
|
|
da30a20a4d | ||
|
|
1fe48afea5 | ||
|
|
42e05be867 | ||
|
|
fe45b0e060 | ||
|
|
a06e1498aa | ||
|
|
5a80e7b43a | ||
|
|
3fb2a23029 | ||
|
|
cd10b3ea63 | ||
|
|
547993dcd0 | ||
|
|
6c9b71bc08 | ||
|
|
93b8404599 | ||
|
|
9ba1e1dcc0 | ||
|
|
b8079a40bc | ||
|
|
5bc8a73af6 | ||
|
|
b3eaeded12 | ||
|
|
ec65b391cb | ||
|
|
2982514072 | ||
|
|
98908bcf7c | ||
|
|
04b32c8f96 | ||
|
|
40eec6b15c | ||
|
|
39efc6e3e0 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.28**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.31**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.08.28
|
||||
[debug] youtube-dl version 2016.08.31
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
22
ChangeLog
22
ChangeLog
@@ -1,3 +1,25 @@
|
||||
version 2016.08.31
|
||||
|
||||
Extractors
|
||||
* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505)
|
||||
* [bandcamp:album] Fix title extraction (#10455)
|
||||
* [pyvideo] Fix extraction (#10468)
|
||||
+ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016)
|
||||
* [9c9media] Extract more metadata
|
||||
* [9c9media] Fix multiple stacks extraction (#10016)
|
||||
* [adultswim] Improve video info extraction (#10492)
|
||||
* [vodplatform] Improve embed regular expression
|
||||
- [played] Remove extractor (#10470)
|
||||
+ [tbs] Add extractor for tbs.com and tntdrama.com (#10222)
|
||||
+ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110)
|
||||
* [adultswim] Rework in terms of turner extractor
|
||||
* [cnn] Rework in terms of turner extractor
|
||||
* [nba] Rework in terms of turner extractor
|
||||
+ [turner] Add base extractor for Turner Broadcasting System based sites
|
||||
* [bilibili] Fix extraction (#10375)
|
||||
* [openload] Fix extraction (#10408)
|
||||
|
||||
|
||||
version 2016.08.28
|
||||
|
||||
Core
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
- **5min**
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9c9media**
|
||||
- **9c9media:stack**
|
||||
- **9gag**
|
||||
- **9now.com.au**
|
||||
- **abc.net.au**
|
||||
@@ -89,7 +91,7 @@
|
||||
- **Bet**
|
||||
- **Bigflix**
|
||||
- **Bild**: Bild.de
|
||||
- **BiliBili** (Currently broken)
|
||||
- **BiliBili**
|
||||
- **BioBioChileTV**
|
||||
- **BIQLE**
|
||||
- **BleacherReport**
|
||||
@@ -115,6 +117,7 @@
|
||||
- **Canvas**
|
||||
- **CarambaTV**
|
||||
- **CarambaTVPage**
|
||||
- **CartoonNetwork**
|
||||
- **cbc.ca**
|
||||
- **cbc.ca:player**
|
||||
- **cbc.ca:watch**
|
||||
@@ -459,7 +462,6 @@
|
||||
- **nick.de**
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **NineCNineMedia**
|
||||
- **Nintendo**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
@@ -517,7 +519,6 @@
|
||||
- **Pinkbike**
|
||||
- **Pladform**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
@@ -675,6 +676,7 @@
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tass**
|
||||
- **TBS**
|
||||
- **TDSLifeway**
|
||||
- **teachertube**: teachertube.com videos
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
|
||||
@@ -223,7 +223,8 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
if proxy.startswith('socks'):
|
||||
self.report_warning(
|
||||
'%s does not support SOCKS proxies. Downloading may fail.' % self.get_basename())
|
||||
'%s does not support SOCKS proxies. Downloading is likely to fail. '
|
||||
'Consider adding --hls-prefer-native to your command.' % self.get_basename())
|
||||
|
||||
# Since December 2015 ffmpeg supports -http_proxy option (see
|
||||
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
|
||||
|
||||
@@ -83,7 +83,10 @@ class HlsFD(FragmentFD):
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
extra_query = None
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
if extra_param_to_segment_url:
|
||||
extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
@@ -97,8 +100,8 @@ class HlsFD(FragmentFD):
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
||||
if extra_param_to_segment_url:
|
||||
frag_url = update_url_query(frag_url, extra_param_to_segment_url)
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
return False
|
||||
@@ -124,8 +127,8 @@ class HlsFD(FragmentFD):
|
||||
if not re.match(r'^https?://', decrypt_info['URI']):
|
||||
decrypt_info['URI'] = compat_urlparse.urljoin(
|
||||
man_url, decrypt_info['URI'])
|
||||
if extra_param_to_segment_url:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_param_to_segment_url)
|
||||
if extra_query:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
|
||||
@@ -3,16 +3,14 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AdultSwimIE(InfoExtractor):
|
||||
class AdultSwimIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -96,7 +94,8 @@ class AdultSwimIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -148,7 +147,10 @@ class AdultSwimIE(InfoExtractor):
|
||||
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||
video_info = bootstrapped_data['slugged_video']
|
||||
if not video_info:
|
||||
video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video')
|
||||
video_info = bootstrapped_data.get(
|
||||
'heroMetadata', {}).get('trailer', {}).get('video')
|
||||
if not video_info:
|
||||
video_info = bootstrapped_data.get('onlineOriginals', [None])[0]
|
||||
if not video_info:
|
||||
raise ExtractorError('Unable to find video info')
|
||||
|
||||
@@ -171,62 +173,29 @@ class AdultSwimIE(InfoExtractor):
|
||||
|
||||
episode_id = video_info['id']
|
||||
episode_title = video_info['title']
|
||||
episode_description = video_info['description']
|
||||
episode_duration = video_info.get('duration')
|
||||
episode_description = video_info.get('description')
|
||||
episode_duration = int_or_none(video_info.get('duration'))
|
||||
view_count = int_or_none(video_info.get('views'))
|
||||
|
||||
entries = []
|
||||
for part_num, segment_id in enumerate(segment_ids):
|
||||
segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id
|
||||
|
||||
segement_info = self._extract_cvp_info(
|
||||
'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id,
|
||||
segment_id, {
|
||||
'secure': {
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
|
||||
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
|
||||
},
|
||||
})
|
||||
segment_title = '%s - %s' % (show_title, episode_title)
|
||||
if len(segment_ids) > 1:
|
||||
segment_title += ' Part %d' % (part_num + 1)
|
||||
|
||||
idoc = self._download_xml(
|
||||
segment_url, segment_title,
|
||||
'Downloading segment information', 'Unable to download segment information')
|
||||
|
||||
segment_duration = float_or_none(
|
||||
xpath_text(idoc, './/trt', 'segment duration').strip())
|
||||
|
||||
formats = []
|
||||
file_els = idoc.findall('.//files/file') or idoc.findall('./files/file')
|
||||
|
||||
unique_urls = []
|
||||
unique_file_els = []
|
||||
for file_el in file_els:
|
||||
media_url = file_el.text
|
||||
if not media_url or determine_ext(media_url) == 'f4m':
|
||||
continue
|
||||
if file_el.text not in unique_urls:
|
||||
unique_urls.append(file_el.text)
|
||||
unique_file_els.append(file_el)
|
||||
|
||||
for file_el in unique_file_els:
|
||||
bitrate = file_el.attrib.get('bitrate')
|
||||
ftype = file_el.attrib.get('type')
|
||||
media_url = file_el.text
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, segment_title, 'mp4', preference=0,
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': '%s_%s' % (bitrate, ftype),
|
||||
'url': file_el.text.strip(),
|
||||
# The bitrate may not be a number (for example: 'iphone')
|
||||
'tbr': int(bitrate) if bitrate.isdigit() else None,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
segement_info.update({
|
||||
'id': segment_id,
|
||||
'title': segment_title,
|
||||
'formats': formats,
|
||||
'duration': segment_duration,
|
||||
'description': episode_description
|
||||
'description': episode_description,
|
||||
})
|
||||
entries.append(segement_info)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
@@ -235,5 +204,6 @@ class AdultSwimIE(InfoExtractor):
|
||||
'entries': entries,
|
||||
'title': '%s - %s' % (show_title, episode_title),
|
||||
'description': episode_description,
|
||||
'duration': episode_duration
|
||||
'duration': episode_duration,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
@@ -162,6 +162,15 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'uploader_id': 'dotscale',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
# with escaped quote in title
|
||||
'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
|
||||
'info_dict': {
|
||||
'title': '"Entropy" EP',
|
||||
'uploader_id': 'jstrecords',
|
||||
'id': 'entropy-ep',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -176,8 +185,11 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
title = self._search_regex(
|
||||
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
||||
title = self._html_search_regex(
|
||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||
webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = title.replace(r'\"', '"')
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'uploader_id': uploader_id,
|
||||
|
||||
@@ -1,35 +1,26 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
|
||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||
'info_dict': {
|
||||
'id': '1554319',
|
||||
'id': '1074402',
|
||||
'ext': 'mp4',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
@@ -43,24 +34,28 @@ class BiliBiliIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||
'info_dict': {
|
||||
'id': '1507019',
|
||||
'id': '1041170',
|
||||
'ext': 'mp4',
|
||||
'title': '【BD1080P】刀语【诸神&异域】',
|
||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
||||
'duration': 3382.259,
|
||||
'timestamp': 1396530060,
|
||||
'upload_date': '20140403',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'uploader': '枫叶逝去',
|
||||
'uploader_id': '520116',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av4808130/',
|
||||
'info_dict': {
|
||||
'id': '7802182',
|
||||
'id': '4808130',
|
||||
'ext': 'mp4',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'duration': 1493.995,
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
@@ -68,12 +63,14 @@ class BiliBiliIE(InfoExtractor):
|
||||
# Missing upload time
|
||||
'url': 'http://www.bilibili.com/video/av1867637/',
|
||||
'info_dict': {
|
||||
'id': '2880301',
|
||||
'id': '1867637',
|
||||
'ext': 'mp4',
|
||||
'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
|
||||
'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
|
||||
'duration': 5760.0,
|
||||
'uploader': '黑夜为猫',
|
||||
'uploader_id': '610729',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
# Just to test metadata extraction
|
||||
@@ -82,86 +79,61 @@ class BiliBiliIE(InfoExtractor):
|
||||
'expected_warnings': ['upload time'],
|
||||
}]
|
||||
|
||||
# BiliBili blocks keys from time to time. The current key is extracted from
|
||||
# the Android client
|
||||
# TODO: find the sign algorithm used in the flash player
|
||||
_APP_KEY = '86385cdc024c0f6c'
|
||||
_APP_KEY = '6f90a59ac58a4123'
|
||||
_BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
params = compat_parse_qs(self._search_regex(
|
||||
cid = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))
|
||||
cid = params['cid'][0]
|
||||
webpage, 'player parameters'))['cid'][0]
|
||||
|
||||
info_xml_str = self._download_webpage(
|
||||
'http://interface.bilibili.com/v_cdn_play',
|
||||
cid, query={'appkey': self._APP_KEY, 'cid': cid},
|
||||
note='Downloading video info page')
|
||||
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
err_msg = None
|
||||
durls = None
|
||||
info_xml = None
|
||||
try:
|
||||
info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
|
||||
except compat_xml_parse_error:
|
||||
info_json = self._parse_json(info_xml_str, video_id, fatal=False)
|
||||
err_msg = (info_json or {}).get('error_text')
|
||||
else:
|
||||
err_msg = xpath_text(info_xml, './message')
|
||||
|
||||
if info_xml is not None:
|
||||
durls = info_xml.findall('./durl')
|
||||
if not durls:
|
||||
if err_msg:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
|
||||
else:
|
||||
raise ExtractorError('No videos found!')
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page')
|
||||
|
||||
entries = []
|
||||
|
||||
for durl in durls:
|
||||
size = xpath_text(durl, ['./filesize', './size'])
|
||||
for idx, durl in enumerate(video_info['durl']):
|
||||
formats = [{
|
||||
'url': durl.find('./url').text,
|
||||
'filesize': int_or_none(size),
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl.findall('./backup_url/url'):
|
||||
for backup_url in durl['backup_url']:
|
||||
formats.append({
|
||||
'url': backup_url.text,
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url.text else -3,
|
||||
'preference': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
|
||||
'duration': int_or_none(xpath_text(durl, './length'), 1000),
|
||||
'id': '%s_part%s' % (video_id, idx),
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
datetime_str = self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
|
||||
timestamp = None
|
||||
if datetime_str:
|
||||
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info = {
|
||||
'id': compat_str(cid),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
|
||||
'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
|
||||
'duration': float_or_none(video_info.get('timelength'), scale=1000),
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
|
||||
36
youtube_dl/extractor/cartoonnetwork.py
Normal file
36
youtube_dl/extractor/cartoonnetwork.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
|
||||
|
||||
class CartoonNetworkIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
|
||||
'info_dict': {
|
||||
'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starfire the Cat Lady',
|
||||
'description': 'Robin decides to become a cat so that Starfire will finally love him.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
|
||||
query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
|
||||
return self._extract_cvp_info(
|
||||
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
|
||||
'secure': {
|
||||
'media_src': 'http://apple-secure.cdn.turner.com/toon/big',
|
||||
'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do',
|
||||
},
|
||||
})
|
||||
@@ -3,14 +3,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
url_basename,
|
||||
)
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class CNNIE(InfoExtractor):
|
||||
class CNNIE(TurnerBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||
|
||||
@@ -25,6 +22,7 @@ class CNNIE(InfoExtractor):
|
||||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||
@@ -34,7 +32,8 @@ class CNNIE(InfoExtractor):
|
||||
'title': "Student's epic speech stuns new freshmen",
|
||||
'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
||||
'upload_date': '20130821',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||
@@ -44,7 +43,8 @@ class CNNIE(InfoExtractor):
|
||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
|
||||
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
|
||||
@@ -54,7 +54,11 @@ class CNNIE(InfoExtractor):
|
||||
'title': '5 stunning stats about Netflix',
|
||||
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
|
||||
'upload_date': '20160819',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||
'only_matching': True,
|
||||
@@ -79,72 +83,21 @@ class CNNIE(InfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_timestamp(self, video_data):
|
||||
# TODO: fix timestamp extraction
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, path, page_title = re.match(self._VALID_URL, url).groups()
|
||||
if sub_domain not in ('money', 'edition'):
|
||||
sub_domain = 'edition'
|
||||
config = self._CONFIG[sub_domain]
|
||||
info_url = config['data_src'] % path
|
||||
info = self._download_xml(info_url, page_title)
|
||||
|
||||
formats = []
|
||||
rex = re.compile(r'''(?x)
|
||||
(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
(?:_(?P<bitrate>[0-9]+)k)?
|
||||
''')
|
||||
for f in info.findall('files/file'):
|
||||
video_url = config['media_src'] + f.text.strip()
|
||||
fdct = {
|
||||
'format_id': f.attrib['bitrate'],
|
||||
'url': video_url,
|
||||
}
|
||||
|
||||
mf = rex.match(f.attrib['bitrate'])
|
||||
if mf:
|
||||
fdct['width'] = int(mf.group('width'))
|
||||
fdct['height'] = int(mf.group('height'))
|
||||
fdct['tbr'] = int_or_none(mf.group('bitrate'))
|
||||
else:
|
||||
mf = rex.search(f.text)
|
||||
if mf:
|
||||
fdct['width'] = int(mf.group('width'))
|
||||
fdct['height'] = int(mf.group('height'))
|
||||
fdct['tbr'] = int_or_none(mf.group('bitrate'))
|
||||
else:
|
||||
mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate'])
|
||||
if mi:
|
||||
if mi.group(1) == 'audio':
|
||||
fdct['vcodec'] = 'none'
|
||||
fdct['ext'] = 'm4a'
|
||||
else:
|
||||
fdct['tbr'] = int(mi.group(1))
|
||||
|
||||
formats.append(fdct)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'height': int(t.attrib['height']),
|
||||
'width': int(t.attrib['width']),
|
||||
'url': t.text,
|
||||
} for t in info.findall('images/image')]
|
||||
|
||||
metas_el = info.find('metas')
|
||||
upload_date = (
|
||||
metas_el.attrib.get('version') if metas_el is not None else None)
|
||||
|
||||
duration_el = info.find('length')
|
||||
duration = parse_duration(duration_el.text)
|
||||
|
||||
return {
|
||||
'id': info.attrib['id'],
|
||||
'title': info.find('headline').text,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': info.find('description').text,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
return self._extract_cvp_info(
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
class CNNBlogsIE(InfoExtractor):
|
||||
@@ -159,6 +112,7 @@ class CNNBlogsIE(InfoExtractor):
|
||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||
'upload_date': '20140209',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
@@ -181,9 +135,10 @@ class CNNArticleIE(InfoExtractor):
|
||||
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Obama: Cyberattack not an act of war',
|
||||
'description': 'md5:51ce6750450603795cad0cdfbd7d05c5',
|
||||
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>ctv|tsn|bnn|thecomedynetwork)\.ca/.*?(?:\bvid=|-vid|~|%7E)(?P<id>[0-9.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||
@@ -18,13 +20,27 @@ class CTVIE(InfoExtractor):
|
||||
'timestamp': 1442624700,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
}, {
|
||||
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
if domain == 'thecomedynetwork':
|
||||
domain = 'comedy'
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': '9c9media:ctv_web:%s' % video_id,
|
||||
'url': '9c9media:%s_web:%s' % (domain, video_id),
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
||||
|
||||
@@ -127,6 +127,7 @@ from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
)
|
||||
from .cartoonnetwork import CartoonNetworkIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCPlayerIE,
|
||||
@@ -553,7 +554,10 @@ from .nick import (
|
||||
NickDeIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from .ninecninemedia import (
|
||||
NineCNineMediaStackIE,
|
||||
NineCNineMediaIE,
|
||||
)
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
@@ -635,7 +639,6 @@ from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playtvak import PlaytvakIE
|
||||
@@ -823,6 +826,7 @@ from .tagesschau import (
|
||||
TagesschauIE,
|
||||
)
|
||||
from .tass import TassIE
|
||||
from .tbs import TBSIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
|
||||
@@ -104,7 +104,8 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'expected_warnings': [
|
||||
'URL could be a direct video link, returning it as such.'
|
||||
]
|
||||
],
|
||||
'skip': 'URL invalid',
|
||||
},
|
||||
# Direct download with broken HEAD
|
||||
{
|
||||
@@ -268,7 +269,8 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# m3u8 served with Content-Type: text/plain
|
||||
{
|
||||
@@ -283,7 +285,8 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
@@ -368,6 +371,7 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
|
||||
},
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
'skip': 'video gone',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.championat.com/video/football/v/87/87499.html',
|
||||
@@ -421,6 +425,7 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'movie expired',
|
||||
},
|
||||
# embed.ly video
|
||||
{
|
||||
@@ -448,6 +453,8 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
|
||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||
},
|
||||
# HEAD requests lead to endless 301, while GET is OK
|
||||
'expected_warnings': ['301'],
|
||||
},
|
||||
# RUTV embed
|
||||
{
|
||||
@@ -522,6 +529,9 @@ class GenericIE(InfoExtractor):
|
||||
'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
# This forum does not allow <iframe> syntaxes anymore
|
||||
# Now HTML tags are displayed as-is
|
||||
'skip': 'No videos on this page',
|
||||
},
|
||||
# Embedded TED video
|
||||
{
|
||||
@@ -570,7 +580,8 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Requires rtmpdump'
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# francetv embed
|
||||
{
|
||||
@@ -2232,11 +2243,11 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for VODPlatform embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)',
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform')
|
||||
self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
|
||||
|
||||
# Look for Instagram embeds
|
||||
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
|
||||
|
||||
@@ -18,31 +18,20 @@ from ..utils import (
|
||||
class KUSIIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
|
||||
'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
|
||||
'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right',
|
||||
'md5': '4e76ce8e53660ce9697d06c0ba6fc47d',
|
||||
'info_dict': {
|
||||
'id': '12203019',
|
||||
'id': '12689020',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turko Files: Case Closed! & Put On Hold!',
|
||||
'duration': 231.0,
|
||||
'upload_date': '20160210',
|
||||
'timestamp': 1455087571,
|
||||
'title': "Turko Files: Refused to Help, It Ain't Right!",
|
||||
'duration': 223.586,
|
||||
'upload_date': '20160826',
|
||||
'timestamp': 1472233118,
|
||||
'thumbnail': 're:^https?://.*\.jpg$'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://kusi.com/video?clipId=12203019',
|
||||
'info_dict': {
|
||||
'id': '12203019',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turko Files: Case Closed! & Put On Hold!',
|
||||
'duration': 231.0,
|
||||
'upload_date': '20160210',
|
||||
'timestamp': 1455087571,
|
||||
'thumbnail': 're:^https?://.*\.jpg$'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Same as previous one
|
||||
},
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,25 +1,20 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
remove_start,
|
||||
xpath_text,
|
||||
xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(InfoExtractor):
|
||||
class NBAIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
@@ -44,28 +39,30 @@ class NBAIE(InfoExtractor):
|
||||
'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
|
||||
'info_dict': {
|
||||
'id': '0041400301-cle-atl-recap',
|
||||
'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hawks vs. Cavaliers Game 1',
|
||||
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
|
||||
'duration': 228,
|
||||
'timestamp': 1432134543,
|
||||
'upload_date': '20150520',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
|
||||
'info_dict': {
|
||||
'id': '1455672027478-Doc_Feb16_720',
|
||||
'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324',
|
||||
'ext': 'mp4',
|
||||
'title': 'Practice: Doc Rivers - 2/16/16',
|
||||
'description': 'Head Coach Doc Rivers addresses the media following practice.',
|
||||
'upload_date': '20160217',
|
||||
'upload_date': '20160216',
|
||||
'timestamp': 1455672000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
@@ -80,7 +77,7 @@ class NBAIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'Wigginsmp4',
|
||||
'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
|
||||
@@ -92,6 +89,7 @@ class NBAIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 30
|
||||
@@ -145,53 +143,12 @@ class NBAIE(InfoExtractor):
|
||||
if path.startswith('video/teams'):
|
||||
path = 'video/channels/proxy/' + path[6:]
|
||||
|
||||
video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
|
||||
video_id = os.path.splitext(xpath_text(video_info, 'slug'))[0]
|
||||
title = xpath_text(video_info, 'headline')
|
||||
description = xpath_text(video_info, 'description')
|
||||
duration = parse_duration(xpath_text(video_info, 'length'))
|
||||
timestamp = int_or_none(xpath_attr(video_info, 'dateCreated', 'uts'))
|
||||
|
||||
thumbnails = []
|
||||
for image in video_info.find('images'):
|
||||
thumbnails.append({
|
||||
'id': image.attrib.get('cut'),
|
||||
'url': image.text,
|
||||
'width': int_or_none(image.attrib.get('width')),
|
||||
'height': int_or_none(image.attrib.get('height')),
|
||||
return self._extract_cvp_info(
|
||||
'http://www.nba.com/%s.xml' % path, video_id, {
|
||||
'default': {
|
||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||
},
|
||||
'm3u8': {
|
||||
'media_src': 'http://nbavod-f.akamaihd.net',
|
||||
},
|
||||
})
|
||||
|
||||
formats = []
|
||||
for video_file in video_info.findall('.//file'):
|
||||
video_url = video_file.text
|
||||
if video_url.startswith('/'):
|
||||
continue
|
||||
if video_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
|
||||
elif video_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
key = video_file.attrib.get('bitrate')
|
||||
format_info = {
|
||||
'format_id': key,
|
||||
'url': video_url,
|
||||
}
|
||||
mobj = re.search(r'(\d+)x(\d+)(?:_(\d+))?', key)
|
||||
if mobj:
|
||||
format_info.update({
|
||||
'width': int(mobj.group(1)),
|
||||
'height': int(mobj.group(2)),
|
||||
'tbr': int_or_none(mobj.group(3)),
|
||||
})
|
||||
formats.append(format_info)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -4,40 +4,36 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
ExtractorError
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NineCNineMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
|
||||
class NineCNineMediaBaseIE(InfoExtractor):
|
||||
_API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
|
||||
|
||||
|
||||
class NineCNineMediaStackIE(NineCNineMediaBaseIE):
|
||||
IE_NAME = '9c9media:stack'
|
||||
_VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
destination_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id)
|
||||
content = self._download_json(api_base_url, video_id, query={
|
||||
'$include': '[contentpackages]',
|
||||
})
|
||||
title = content['Name']
|
||||
if len(content['ContentPackages']) > 1:
|
||||
raise ExtractorError('multiple content packages')
|
||||
content_package = content['ContentPackages'][0]
|
||||
stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id']
|
||||
stacks = self._download_json(stacks_base_url, video_id)['Items']
|
||||
if len(stacks) > 1:
|
||||
raise ExtractorError('multiple stacks')
|
||||
stack = stacks[0]
|
||||
stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id'])
|
||||
destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups()
|
||||
stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.'
|
||||
stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id)
|
||||
|
||||
formats = []
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stack_base_url + 'm3u8', video_id, 'mp4',
|
||||
stack_base_url + 'm3u8', stack_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stack_base_url + 'f4m', video_id,
|
||||
stack_base_url + 'f4m', stack_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False)
|
||||
mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False)
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
@@ -46,10 +42,86 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': content.get('Desc') or content.get('ShortDesc'),
|
||||
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
||||
'duration': parse_duration(content.get('BroadcastTime')),
|
||||
'id': stack_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class NineCNineMediaIE(NineCNineMediaBaseIE):
|
||||
IE_NAME = '9c9media'
|
||||
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
destination_code, content_id = re.match(self._VALID_URL, url).groups()
|
||||
api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
|
||||
content = self._download_json(api_base_url, content_id, query={
|
||||
'$include': '[Media,Season,ContentPackages]',
|
||||
})
|
||||
title = content['Name']
|
||||
if len(content['ContentPackages']) > 1:
|
||||
raise ExtractorError('multiple content packages')
|
||||
content_package = content['ContentPackages'][0]
|
||||
package_id = content_package['Id']
|
||||
content_package_url = api_base_url + 'contentpackages/%s/' % package_id
|
||||
content_package = self._download_json(content_package_url, content_id)
|
||||
|
||||
if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm':
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items']
|
||||
multistacks = len(stacks) > 1
|
||||
|
||||
thumbnails = []
|
||||
for image in content.get('Images', []):
|
||||
image_url = image.get('Url')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('Width')),
|
||||
'height': int_or_none(image.get('Height')),
|
||||
})
|
||||
|
||||
tags, categories = [], []
|
||||
for source_name, container in (('Tags', tags), ('Genres', categories)):
|
||||
for e in content.get(source_name, []):
|
||||
e_name = e.get('Name')
|
||||
if not e_name:
|
||||
continue
|
||||
container.append(e_name)
|
||||
|
||||
description = content.get('Desc') or content.get('ShortDesc')
|
||||
season = content.get('Season', {})
|
||||
base_info = {
|
||||
'description': description,
|
||||
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
||||
'episode_number': int_or_none(content.get('Episode')),
|
||||
'season': season.get('Name'),
|
||||
'season_number': season.get('Number'),
|
||||
'season_id': season.get('Id'),
|
||||
'series': content.get('Media', {}).get('Name'),
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
}
|
||||
|
||||
entries = []
|
||||
for stack in stacks:
|
||||
stack_id = compat_str(stack['Id'])
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id),
|
||||
'id': stack_id,
|
||||
'title': '%s_part%s' % (title, stack['Name']) if multistacks else title,
|
||||
'duration': float_or_none(stack.get('Duration')),
|
||||
'ie_key': 'NineCNineMediaStack',
|
||||
}
|
||||
entry.update(base_info)
|
||||
entries.append(entry)
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
@@ -55,10 +55,12 @@ class OpenloadIE(InfoExtractor):
|
||||
|
||||
video_url_chars = []
|
||||
|
||||
for c in enc_data:
|
||||
for idx, c in enumerate(enc_data):
|
||||
j = compat_ord(c)
|
||||
if j >= 33 and j <= 126:
|
||||
j = ((j + 14) % 94) + 33
|
||||
if idx == len(enc_data) - 1:
|
||||
j += 1
|
||||
video_url_chars += compat_chr(j)
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
|
||||
|
||||
@@ -1,60 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import os.path
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class PlayedIE(InfoExtractor):
|
||||
IE_NAME = 'played.to'
|
||||
_VALID_URL = r'https?://(?:www\.)?played\.to/(?P<id>[a-zA-Z0-9_-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://played.to/j2f2sfiiukgt',
|
||||
'md5': 'c2bd75a368e82980e7257bf500c00637',
|
||||
'info_dict': {
|
||||
'id': 'j2f2sfiiukgt',
|
||||
'ext': 'flv',
|
||||
'title': 'youtube-dl_test_video.mp4',
|
||||
},
|
||||
'skip': 'Removed for copyright infringement.', # oh wow
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m_error = re.search(
|
||||
r'(?s)Reason for deletion:.*?<b class="err"[^>]*>(?P<msg>[^<]+)</b>', orig_webpage)
|
||||
if m_error:
|
||||
raise ExtractorError(m_error.group('msg'), expected=True)
|
||||
|
||||
data = self._hidden_inputs(orig_webpage)
|
||||
|
||||
self._sleep(2, video_id)
|
||||
|
||||
post = urlencode_postdata(data)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = sanitized_Request(url, post, headers)
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
|
||||
title = os.path.splitext(data['fname'])[0]
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'file: "?(.+?)",', webpage, 'video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
}
|
||||
@@ -1,59 +1,72 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class PyvideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
|
||||
_VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
|
||||
'md5': '520915673e53a5c5d487c36e0c4d85b5',
|
||||
'info_dict': {
|
||||
'id': '24_4WWkSmNo',
|
||||
'ext': 'webm',
|
||||
'title': 'Become a logging expert in 30 minutes',
|
||||
'description': 'md5:9665350d466c67fb5b1598de379021f7',
|
||||
'upload_date': '20130320',
|
||||
'uploader': 'Next Day Video',
|
||||
'uploader_id': 'NextDayVideo',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
_TESTS = [{
|
||||
'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html',
|
||||
'info_dict': {
|
||||
'id': 'become-a-logging-expert-in-30-minutes',
|
||||
},
|
||||
{
|
||||
'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
|
||||
'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
|
||||
'info_dict': {
|
||||
'id': '2542',
|
||||
'ext': 'm4v',
|
||||
'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html',
|
||||
'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
|
||||
'info_dict': {
|
||||
'id': '2542',
|
||||
'ext': 'm4v',
|
||||
'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v',
|
||||
},
|
||||
]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
category = mobj.group('category')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
entries = []
|
||||
|
||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
|
||||
if m_youtube is not None:
|
||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||
data = self._download_json(
|
||||
'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json'
|
||||
% (category, video_id), video_id, fatal=False)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>',
|
||||
webpage, 'title', flags=re.DOTALL)
|
||||
video_url = self._search_regex(
|
||||
[r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
|
||||
webpage, 'video url', flags=re.DOTALL)
|
||||
if data:
|
||||
for video in data['videos']:
|
||||
video_url = video.get('url')
|
||||
if video_url:
|
||||
if video.get('type') == 'youtube':
|
||||
entries.append(self.url_result(video_url, 'Youtube'))
|
||||
else:
|
||||
entries.append({
|
||||
'id': compat_str(data.get('id') or video_id),
|
||||
'url': video_url,
|
||||
'title': data['title'],
|
||||
'description': data.get('description') or data.get('summary'),
|
||||
'thumbnail': data.get('thumbnail_url'),
|
||||
'duration': int_or_none(data.get('duration')),
|
||||
})
|
||||
else:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
media_urls = self._search_regex(
|
||||
r'(?s)Media URL:(.+?)</li>', webpage, 'media urls')
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls):
|
||||
media_url = m.group('url')
|
||||
if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url):
|
||||
entries.append(self.url_result(media_url, 'Youtube'))
|
||||
else:
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'url': media_url,
|
||||
'title': title,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': os.path.splitext(title)[0],
|
||||
'url': video_url,
|
||||
}
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
@@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)^(?:https?://)?
|
||||
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
||||
(?P<uploader>[\w\d-]+)/
|
||||
(?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))
|
||||
(?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
|
||||
(?P<title>[\w\d-]+)/?
|
||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
||||
@@ -265,6 +265,9 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
'title': 'The Royal Concept EP',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
59
youtube_dl/extractor/tbs.py
Normal file
59
youtube_dl/extractor/tbs.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class TBSIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html',
|
||||
'md5': '9e61d680e2285066ade7199e6408b2ee',
|
||||
'info_dict': {
|
||||
'id': '2007318',
|
||||
'ext': 'mp4',
|
||||
'title': 'Theatrical Trailer',
|
||||
'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html',
|
||||
'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56',
|
||||
'info_dict': {
|
||||
'id': '1538823',
|
||||
'ext': 'mp4',
|
||||
'title': 'You Better Run',
|
||||
'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, display_id = re.match(self._VALID_URL, url).groups()
|
||||
site = domain[:3]
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params'))
|
||||
if video_params.get('isAuthRequired') == 'true':
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported.', expected=True)
|
||||
query = None
|
||||
clip_id = video_params.get('clipid')
|
||||
if clip_id:
|
||||
query = 'id=' + clip_id
|
||||
else:
|
||||
query = 'titleId=' + video_params['titleid']
|
||||
return self._extract_cvp_info(
|
||||
'http://www.%s.com/service/cvpXml?%s' % (domain, query), display_id, {
|
||||
'default': {
|
||||
'media_src': 'http://ht.cdn.turner.com/%s/big' % site,
|
||||
},
|
||||
'secure': {
|
||||
'media_src': 'http://apple-secure.cdn.turner.com/%s/big' % site,
|
||||
'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain,
|
||||
},
|
||||
})
|
||||
178
youtube_dl/extractor/turner.py
Normal file
178
youtube_dl/extractor/turner.py
Normal file
@@ -0,0 +1,178 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
parse_duration,
|
||||
xpath_attr,
|
||||
update_url_query,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class TurnerBaseIE(InfoExtractor):
|
||||
def _extract_timestamp(self, video_data):
|
||||
return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
|
||||
|
||||
def _extract_cvp_info(self, data_src, video_id, path_data={}):
|
||||
video_data = self._download_xml(data_src, video_id)
|
||||
video_id = video_data.attrib['id']
|
||||
title = xpath_text(video_data, 'headline', fatal=True)
|
||||
# rtmp_src = xpath_text(video_data, 'akamai/src')
|
||||
# if rtmp_src:
|
||||
# splited_rtmp_src = rtmp_src.split(',')
|
||||
# if len(splited_rtmp_src) == 2:
|
||||
# rtmp_src = splited_rtmp_src[1]
|
||||
# aifp = xpath_text(video_data, 'akamai/aifp', default='')
|
||||
|
||||
tokens = {}
|
||||
urls = []
|
||||
formats = []
|
||||
rex = re.compile(
|
||||
r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?')
|
||||
# Possible formats locations: files/file, files/groupFiles/files
|
||||
# and maybe others
|
||||
for video_file in video_data.findall('.//file'):
|
||||
video_url = video_file.text.strip()
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if video_url.startswith('/mp4:protected/'):
|
||||
continue
|
||||
# TODO Correct extraction for these files
|
||||
# protected_path_data = path_data.get('protected')
|
||||
# if not protected_path_data or not rtmp_src:
|
||||
# continue
|
||||
# protected_path = self._search_regex(
|
||||
# r'/mp4:(.+)\.[a-z0-9]', video_url, 'secure path')
|
||||
# auth = self._download_webpage(
|
||||
# protected_path_data['tokenizer_src'], query={
|
||||
# 'path': protected_path,
|
||||
# 'videoId': video_id,
|
||||
# 'aifp': aifp,
|
||||
# })
|
||||
# token = xpath_text(auth, 'token')
|
||||
# if not token:
|
||||
# continue
|
||||
# video_url = rtmp_src + video_url + '?' + token
|
||||
elif video_url.startswith('/secure/'):
|
||||
secure_path_data = path_data.get('secure')
|
||||
if not secure_path_data:
|
||||
continue
|
||||
video_url = secure_path_data['media_src'] + video_url
|
||||
secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
|
||||
token = tokens.get(secure_path)
|
||||
if not token:
|
||||
auth = self._download_xml(
|
||||
secure_path_data['tokenizer_src'], video_id, query={
|
||||
'path': secure_path,
|
||||
'videoId': video_id,
|
||||
})
|
||||
token = xpath_text(auth, 'token')
|
||||
if not token:
|
||||
continue
|
||||
tokens[secure_path] = token
|
||||
video_url = video_url + '?hdnea=' + token
|
||||
elif not re.match('https?://', video_url):
|
||||
base_path_data = path_data.get(ext, path_data.get('default', {}))
|
||||
media_src = base_path_data.get('media_src')
|
||||
if not media_src:
|
||||
continue
|
||||
video_url = media_src + video_url
|
||||
if video_url in urls:
|
||||
continue
|
||||
urls.append(video_url)
|
||||
format_id = video_file.get('bitrate')
|
||||
if ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
video_url, video_id, fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id=format_id or 'hls',
|
||||
fatal=False)
|
||||
if m3u8_formats:
|
||||
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
|
||||
# ourselves
|
||||
qs = compat_urlparse.urlparse(video_url).query
|
||||
if qs:
|
||||
query = compat_urlparse.parse_qs(qs)
|
||||
for m3u8_format in m3u8_formats:
|
||||
m3u8_format['url'] = update_url_query(m3u8_format['url'], query)
|
||||
m3u8_format['extra_param_to_segment_url'] = qs
|
||||
formats.extend(m3u8_formats)
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(video_url, {'hdcore': '3.7.0'}),
|
||||
video_id, f4m_id=format_id or 'hds', fatal=False))
|
||||
else:
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
}
|
||||
mobj = rex.search(format_id + video_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'width': int(mobj.group('width')),
|
||||
'height': int(mobj.group('height')),
|
||||
'tbr': int_or_none(mobj.group('bitrate')),
|
||||
})
|
||||
elif isinstance(format_id, compat_str):
|
||||
if format_id.isdigit():
|
||||
f['tbr'] = int(format_id)
|
||||
else:
|
||||
mobj = re.match(r'ios_(audio|[0-9]+)$', format_id)
|
||||
if mobj:
|
||||
if mobj.group(1) == 'audio':
|
||||
f.update({
|
||||
'vcodec': 'none',
|
||||
'ext': 'm4a',
|
||||
})
|
||||
else:
|
||||
f['tbr'] = int(mobj.group(1))
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for source in video_data.findall('closedCaptions/source'):
|
||||
for track in source.findall('track'):
|
||||
track_url = track.get('url')
|
||||
if not isinstance(track_url, compat_str) or track_url.endswith('/big'):
|
||||
continue
|
||||
lang = track.get('lang') or track.get('label') or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': track_url,
|
||||
'ext': {
|
||||
'scc': 'scc',
|
||||
'webvtt': 'vtt',
|
||||
'smptett': 'tt',
|
||||
}.get(source.get('format'))
|
||||
})
|
||||
|
||||
thumbnails = [{
|
||||
'id': image.get('cut'),
|
||||
'url': image.text,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in video_data.findall('images/image')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
'description': xpath_text(video_data, 'description'),
|
||||
'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')),
|
||||
'timestamp': self._extract_timestamp(video_data),
|
||||
'upload_date': xpath_attr(video_data, 'metas', 'version'),
|
||||
'series': xpath_text(video_data, 'showTitle'),
|
||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||
}
|
||||
@@ -33,9 +33,7 @@ class UplynkIE(InfoExtractor):
|
||||
formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4')
|
||||
if session_id:
|
||||
for f in formats:
|
||||
f['extra_param_to_segment_url'] = {
|
||||
'pbs': session_id,
|
||||
}
|
||||
f['extra_param_to_segment_url'] = 'pbs=' + session_id
|
||||
self._sort_formats(formats)
|
||||
asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id)
|
||||
if asset.get('error') == 1:
|
||||
|
||||
@@ -6,7 +6,7 @@ from ..utils import unescapeHTML
|
||||
|
||||
|
||||
class VODPlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vod-platform\.net/embed/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vod-platform\.net/[eE]mbed/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
# from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
|
||||
'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.08.28'
|
||||
__version__ = '2016.08.31'
|
||||
|
||||
Reference in New Issue
Block a user