mirror of
https://gitlab.com/ytdl-org/youtube-dl.git
synced 2026-01-24 00:00:10 -05:00
Compare commits
32 Commits
2016.08.31
...
2016.09.03
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
86c3bbbced | ||
|
|
4b3a607658 | ||
|
|
3a7d35b982 | ||
|
|
6496ccb413 | ||
|
|
3fcce30289 | ||
|
|
c2b2c7e138 | ||
|
|
dacb3a864a | ||
|
|
6066d03db0 | ||
|
|
6562d34a8c | ||
|
|
5e9e3d0f6b | ||
|
|
349fc5c705 | ||
|
|
2c3e0af93e | ||
|
|
6150502e47 | ||
|
|
b207d5ebd4 | ||
|
|
4191779dcd | ||
|
|
f97ec8bcb9 | ||
|
|
8276d3b87a | ||
|
|
af95ee94b4 | ||
|
|
8fb6af6bba | ||
|
|
f6af0f888b | ||
|
|
e816c9d158 | ||
|
|
9250181f37 | ||
|
|
f096ec2625 | ||
|
|
4c8ab6fd71 | ||
|
|
05d4612947 | ||
|
|
746a695b36 | ||
|
|
165c54e97d | ||
|
|
2896dd73bc | ||
|
|
f8fd510eb4 | ||
|
|
7a3e849f6e | ||
|
|
196c6ba067 | ||
|
|
165620e320 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.31**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.03**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.08.31
|
||||
[debug] youtube-dl version 2016.09.03
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
1
AUTHORS
1
AUTHORS
@@ -182,3 +182,4 @@ Rob van Bekkum
|
||||
Petr Zvoníček
|
||||
Pratyush Singh
|
||||
Aleksander Nitecki
|
||||
Sebastian Blunt
|
||||
|
||||
31
ChangeLog
31
ChangeLog
@@ -1,3 +1,34 @@
|
||||
version 2016.09.03
|
||||
|
||||
Core
|
||||
* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in
|
||||
_extract_m3u8_formats (#10522)
|
||||
* Handle semicolon in mimetype2ext
|
||||
|
||||
|
||||
Extractors
|
||||
+ [youtube] Add support for rental videos' previews (#10532)
|
||||
* [youtube:playlist] Fallback to video extraction for video/playlist URLs when
|
||||
no playlist is actually served (#10537)
|
||||
+ [drtv] Add support for dr.dk/nyheder (#10536)
|
||||
+ [facebook:plugins:video] Add extractor (#10530)
|
||||
+ [go] Add extractor for *.go.com sites
|
||||
* [adobepass] Check for authz_token expiration (#10527)
|
||||
* [nytimes] improve extraction
|
||||
* [thestar] Fix extraction (#10465)
|
||||
* [glide] Fix extraction (#10478)
|
||||
- [exfm] Remove extractor (#10482)
|
||||
* [youporn] Fix categories and tags extraction (#10521)
|
||||
+ [curiositystream] Add extractor for app.curiositystream.com
|
||||
- [thvideo] Remove extractor (#10464)
|
||||
* [movingimage] Fix for the new site name (#10466)
|
||||
+ [cbs] Add support for once formats (#10515)
|
||||
* [limelight] Skip ism snd duplicate manifests
|
||||
+ [porncom] Extract categories and tags (#10510)
|
||||
+ [facebook] Extract timestamp (#10508)
|
||||
+ [yahoo] Extract more formats
|
||||
|
||||
|
||||
version 2016.08.31
|
||||
|
||||
Extractors
|
||||
|
||||
@@ -171,6 +171,8 @@
|
||||
- **CTVNews**
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **CultureUnplugged**
|
||||
- **curiositystream**
|
||||
- **curiositystream:collection**
|
||||
- **CWTV**
|
||||
- **DailyMail**
|
||||
- **dailymotion**
|
||||
@@ -223,11 +225,11 @@
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
- **exfm**: ex.fm
|
||||
- **ExpoTV**
|
||||
- **ExtremeTube**
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
- **FacebookPluginsVideo**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
- **Fczenit**
|
||||
@@ -271,6 +273,7 @@
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
- **Go**
|
||||
- **GodTube**
|
||||
- **GodTV**
|
||||
- **Golem**
|
||||
@@ -406,6 +409,7 @@
|
||||
- **MovieClips**
|
||||
- **MovieFap**
|
||||
- **Moviezine**
|
||||
- **MovingImage**
|
||||
- **MPORA**
|
||||
- **MSN**
|
||||
- **mtg**: MTG services
|
||||
@@ -659,7 +663,6 @@
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||
- **SSA**
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
@@ -702,8 +705,6 @@
|
||||
- **TheStar**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **THVideo**
|
||||
- **THVideoPlaylist**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **tlc.de**
|
||||
- **TMZ**
|
||||
|
||||
@@ -39,6 +39,7 @@ from youtube_dl.utils import (
|
||||
is_html,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
mimetype2ext,
|
||||
ohdave_rsa_encrypt,
|
||||
OnDemandPagedList,
|
||||
orderedSet,
|
||||
@@ -625,6 +626,14 @@ class TestUtil(unittest.TestCase):
|
||||
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||
|
||||
def test_mimetype2ext(self):
|
||||
self.assertEqual(mimetype2ext(None), None)
|
||||
self.assertEqual(mimetype2ext('video/x-flv'), 'flv')
|
||||
self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8')
|
||||
self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
|
||||
self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
|
||||
self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
|
||||
|
||||
def test_parse_codecs(self):
|
||||
self.assertEqual(parse_codecs(''), {})
|
||||
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
|
||||
|
||||
@@ -37,6 +37,10 @@ class AdobePassIE(InfoExtractor):
|
||||
return self._search_regex(
|
||||
'<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
|
||||
|
||||
def is_expired(token, date_ele):
|
||||
token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
|
||||
return token_expires and token_expires <= int(time.time())
|
||||
|
||||
mvpd_headers = {
|
||||
'ap_42': 'anonymous',
|
||||
'ap_11': 'Linux i686',
|
||||
@@ -47,11 +51,8 @@ class AdobePassIE(InfoExtractor):
|
||||
guid = xml_text(resource, 'guid')
|
||||
requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
|
||||
authn_token = requestor_info.get('authn_token')
|
||||
if authn_token:
|
||||
token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires')))
|
||||
if token_expires and token_expires <= int(time.time()):
|
||||
authn_token = None
|
||||
requestor_info = {}
|
||||
if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
|
||||
authn_token = None
|
||||
if not authn_token:
|
||||
# TODO add support for other TV Providers
|
||||
mso_id = 'DTV'
|
||||
@@ -98,6 +99,8 @@ class AdobePassIE(InfoExtractor):
|
||||
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
|
||||
|
||||
authz_token = requestor_info.get(guid)
|
||||
if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
|
||||
authz_token = None
|
||||
if not authz_token:
|
||||
authorize = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
|
||||
|
||||
@@ -51,7 +51,7 @@ class CBSIE(CBSBaseIE):
|
||||
path = 'dJ5BDC/media/guid/2198311517/' + guid
|
||||
smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
|
||||
formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid)
|
||||
for r in ('HLS&formats=M3U', 'RTMP', 'WIFI', '3G'):
|
||||
for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'):
|
||||
try:
|
||||
tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0])
|
||||
formats.extend(tp_formats)
|
||||
|
||||
@@ -1201,7 +1201,8 @@ class InfoExtractor(object):
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}]
|
||||
last_info = None
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_info = parse_m3u8_attributes(line)
|
||||
@@ -1224,23 +1225,24 @@ class InfoExtractor(object):
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
})
|
||||
else:
|
||||
# When there is no URI in EXT-X-MEDIA let this tag's
|
||||
# data be used by regular URI lines below
|
||||
last_media = media
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
if last_info is None:
|
||||
formats.append({'url': format_url(line)})
|
||||
continue
|
||||
tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000)
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
# Despite specification does not mention NAME attribute for
|
||||
# EXT-X-STREAM-INF it still sometimes may be present
|
||||
stream_name = last_info.get('NAME') or last_media.get('NAME')
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
if not live:
|
||||
# Despite specification does not mention NAME attribute for
|
||||
# EXT-X-STREAM-INF it still sometimes may be present
|
||||
stream_name = last_info.get('NAME')
|
||||
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
@@ -1269,6 +1271,7 @@ class InfoExtractor(object):
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
return formats
|
||||
|
||||
@staticmethod
|
||||
|
||||
120
youtube_dl/extractor/curiositystream.py
Normal file
120
youtube_dl/extractor/curiositystream.py
Normal file
@@ -0,0 +1,120 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CuriosityStreamBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'curiositystream'
|
||||
_auth_token = None
|
||||
_API_BASE_URL = 'https://api.curiositystream.com/v1/'
|
||||
|
||||
def _handle_errors(self, result):
|
||||
error = result.get('error', {}).get('message')
|
||||
if error:
|
||||
if isinstance(error, dict):
|
||||
error = ', '.join(error.values())
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
headers = {}
|
||||
if self._auth_token:
|
||||
headers['X-Auth-Token'] = self._auth_token
|
||||
result = self._download_json(
|
||||
self._API_BASE_URL + path, video_id, headers=headers)
|
||||
self._handle_errors(result)
|
||||
return result['data']
|
||||
|
||||
def _real_initialize(self):
|
||||
(email, password) = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
result = self._download_json(
|
||||
self._API_BASE_URL + 'login', None, data=urlencode_postdata({
|
||||
'email': email,
|
||||
'password': password,
|
||||
}))
|
||||
self._handle_errors(result)
|
||||
self._auth_token = result['message']['auth_token']
|
||||
|
||||
def _extract_media_info(self, media):
|
||||
video_id = compat_str(media['id'])
|
||||
limelight_media_id = media['limelight_media_id']
|
||||
title = media['title']
|
||||
|
||||
subtitles = {}
|
||||
for closed_caption in media.get('closed_captions', []):
|
||||
sub_url = closed_caption.get('file')
|
||||
if not sub_url:
|
||||
continue
|
||||
lang = closed_caption.get('code') or closed_caption.get('language') or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'limelight:media:' + limelight_media_id,
|
||||
'title': title,
|
||||
'description': media.get('description'),
|
||||
'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'tags': media.get('tags'),
|
||||
'subtitles': subtitles,
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
||||
|
||||
|
||||
class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
'timestamp': 1448388615,
|
||||
'upload_date': '20151124',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
return self._extract_media_info(media)
|
||||
|
||||
|
||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream:collection'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/collection/2',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 17,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_id = self._match_id(url)
|
||||
collection = self._call_api(
|
||||
'collections/' + collection_id, collection_id)
|
||||
entries = []
|
||||
for media in collection.get('media', []):
|
||||
entries.append(self._extract_media_info(media))
|
||||
return self.playlist_result(
|
||||
entries, collection_id,
|
||||
collection.get('title'), collection.get('description'))
|
||||
@@ -4,26 +4,45 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
|
||||
'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||
'md5': '25e659cccc9a2ed956110a299fdf5983',
|
||||
'info_dict': {
|
||||
'id': 'panisk-paske-5',
|
||||
'id': 'klassen-darlig-taber-10',
|
||||
'ext': 'mp4',
|
||||
'title': 'Panisk Påske (5)',
|
||||
'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
|
||||
'timestamp': 1426984612,
|
||||
'upload_date': '20150322',
|
||||
'duration': 1455,
|
||||
'title': 'Klassen - Dårlig taber (10)',
|
||||
'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
|
||||
'timestamp': 1471991907,
|
||||
'upload_date': '20160823',
|
||||
'duration': 606.84,
|
||||
},
|
||||
}
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
|
||||
'md5': '2c37175c718155930f939ef59952474a',
|
||||
'info_dict': {
|
||||
'id': 'christiania-pusher-street-ryddes-drdkrjpo',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE Christianias rydning af Pusher Street er i gang',
|
||||
'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.',
|
||||
'timestamp': 1472800279,
|
||||
'upload_date': '20160902',
|
||||
'duration': 131.4,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -35,7 +54,8 @@ class DRTVIE(InfoExtractor):
|
||||
'Video %s is not available' % video_id, expected=True)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
(r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
|
||||
webpage, 'video id')
|
||||
|
||||
programcard = self._download_json(
|
||||
@@ -43,9 +63,12 @@ class DRTVIE(InfoExtractor):
|
||||
video_id, 'Downloading video JSON')
|
||||
data = programcard['Data'][0]
|
||||
|
||||
title = data['Title']
|
||||
description = data['Description']
|
||||
timestamp = parse_iso8601(data['CreatedTime'])
|
||||
title = remove_end(self._og_search_title(
|
||||
webpage, default=None), ' | TV | DR') or data['Title']
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or data.get('Description')
|
||||
|
||||
timestamp = parse_iso8601(data.get('CreatedTime'))
|
||||
|
||||
thumbnail = None
|
||||
duration = None
|
||||
@@ -56,16 +79,18 @@ class DRTVIE(InfoExtractor):
|
||||
subtitles = {}
|
||||
|
||||
for asset in data['Assets']:
|
||||
if asset['Kind'] == 'Image':
|
||||
thumbnail = asset['Uri']
|
||||
elif asset['Kind'] == 'VideoResource':
|
||||
duration = asset['DurationInMilliseconds'] / 1000.0
|
||||
restricted_to_denmark = asset['RestrictedToDenmark']
|
||||
spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
|
||||
for link in asset['Links']:
|
||||
uri = link['Uri']
|
||||
target = link['Target']
|
||||
format_id = target
|
||||
if asset.get('Kind') == 'Image':
|
||||
thumbnail = asset.get('Uri')
|
||||
elif asset.get('Kind') == 'VideoResource':
|
||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||
spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
|
||||
for link in asset.get('Links', []):
|
||||
uri = link.get('Uri')
|
||||
if not uri:
|
||||
continue
|
||||
target = link.get('Target')
|
||||
format_id = target or ''
|
||||
preference = None
|
||||
if spoken_subtitles:
|
||||
preference = -1
|
||||
@@ -76,8 +101,8 @@ class DRTVIE(InfoExtractor):
|
||||
video_id, preference, f4m_id=format_id))
|
||||
elif target == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
uri, video_id, 'mp4', preference=preference,
|
||||
m3u8_id=format_id))
|
||||
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
preference=preference, m3u8_id=format_id))
|
||||
else:
|
||||
bitrate = link.get('Bitrate')
|
||||
if bitrate:
|
||||
@@ -85,7 +110,7 @@ class DRTVIE(InfoExtractor):
|
||||
formats.append({
|
||||
'url': uri,
|
||||
'format_id': format_id,
|
||||
'tbr': bitrate,
|
||||
'tbr': int_or_none(bitrate),
|
||||
'ext': link.get('FileFormat'),
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList')
|
||||
@@ -94,12 +119,18 @@ class DRTVIE(InfoExtractor):
|
||||
'Danish': 'da',
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
lang = subs['Language']
|
||||
subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
|
||||
if not subs.get('Uri'):
|
||||
continue
|
||||
lang = subs.get('Language') or 'da'
|
||||
subtitles.setdefault(LANGS.get(lang, lang), []).append({
|
||||
'url': subs['Uri'],
|
||||
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
|
||||
})
|
||||
|
||||
if not formats and restricted_to_denmark:
|
||||
raise ExtractorError(
|
||||
'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True)
|
||||
self.raise_geo_restricted(
|
||||
'Unfortunately, DR is not allowed to show this program outside Denmark.',
|
||||
expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ExfmIE(InfoExtractor):
|
||||
IE_NAME = 'exfm'
|
||||
IE_DESC = 'ex.fm'
|
||||
_VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
|
||||
_SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://ex.fm/song/eh359',
|
||||
'md5': 'e45513df5631e6d760970b14cc0c11e7',
|
||||
'info_dict': {
|
||||
'id': '44216187',
|
||||
'ext': 'mp3',
|
||||
'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive',
|
||||
'uploader': 'deadjournalist',
|
||||
'upload_date': '20120424',
|
||||
'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
|
||||
},
|
||||
'note': 'Soundcloud song',
|
||||
'skip': 'The site is down too often',
|
||||
},
|
||||
{
|
||||
'url': 'http://ex.fm/song/wddt8',
|
||||
'md5': '966bd70741ac5b8570d8e45bfaed3643',
|
||||
'info_dict': {
|
||||
'id': 'wddt8',
|
||||
'ext': 'mp3',
|
||||
'title': 'Safe and Sound',
|
||||
'uploader': 'Capital Cities',
|
||||
},
|
||||
'skip': 'The site is down too often',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
song_id = mobj.group('id')
|
||||
info_url = 'http://ex.fm/api/v3/song/%s' % song_id
|
||||
info = self._download_json(info_url, song_id)['song']
|
||||
song_url = info['url']
|
||||
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
|
||||
self.to_screen('Soundcloud song detected')
|
||||
return self.url_result(song_url.replace('/stream', ''), 'Soundcloud')
|
||||
return {
|
||||
'id': song_id,
|
||||
'url': song_url,
|
||||
'ext': 'mp3',
|
||||
'title': info['title'],
|
||||
'thumbnail': info['image']['large'],
|
||||
'uploader': info['artist'],
|
||||
'view_count': info['loved_count'],
|
||||
}
|
||||
@@ -194,6 +194,10 @@ from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .curiositystream import (
|
||||
CuriosityStreamIE,
|
||||
CuriosityStreamCollectionIE,
|
||||
)
|
||||
from .cwtv import CWTVIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .dailymotion import (
|
||||
@@ -257,11 +261,13 @@ from .espn import ESPNIE
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import FacebookIE
|
||||
from .facebook import (
|
||||
FacebookIE,
|
||||
FacebookPluginsVideoIE,
|
||||
)
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
from .fczenit import FczenitIE
|
||||
@@ -315,6 +321,7 @@ from .globo import (
|
||||
GloboIE,
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .go import GoIE
|
||||
from .godtube import GodTubeIE
|
||||
from .godtv import GodTVIE
|
||||
from .golem import GolemIE
|
||||
@@ -486,6 +493,7 @@ from .motherless import MotherlessIE
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .movingimage import MovingImageIE
|
||||
from .msn import MSNIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
@@ -806,7 +814,6 @@ from .srgssr import (
|
||||
SRGSSRPlayIE,
|
||||
)
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .ssa import SSAIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamable import StreamableIE
|
||||
@@ -869,10 +876,6 @@ from .tnaflix import (
|
||||
MovieFapIE,
|
||||
)
|
||||
from .toggle import ToggleIE
|
||||
from .thvideo import (
|
||||
THVideoIE,
|
||||
THVideoPlaylistIE
|
||||
)
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
|
||||
@@ -15,6 +15,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
limit_length,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
@@ -62,6 +63,8 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
|
||||
'uploader': 'Tennis on Facebook',
|
||||
'upload_date': '20140908',
|
||||
'timestamp': 1410199200,
|
||||
}
|
||||
}, {
|
||||
'note': 'Video without discernible title',
|
||||
@@ -71,6 +74,8 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #274175099429670',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
@@ -78,12 +83,14 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
'url': 'https://www.facebook.com/video.php?v=957955867617029',
|
||||
'md5': '54706e4db4f5ad58fbad82dde1f1213f',
|
||||
'md5': 'b2c28d528273b323abe5c6ab59f0f030',
|
||||
'info_dict': {
|
||||
'id': '957955867617029',
|
||||
'ext': 'mp4',
|
||||
'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
|
||||
'uploader': 'Demy de Zeeuw',
|
||||
'upload_date': '20160110',
|
||||
'timestamp': 1452431627,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
|
||||
@@ -306,12 +313,16 @@ class FacebookIE(InfoExtractor):
|
||||
if not video_title:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
@@ -340,3 +351,32 @@ class FacebookIE(InfoExtractor):
|
||||
self._VIDEO_PAGE_TEMPLATE % video_id,
|
||||
video_id, fatal_if_no_video=True)
|
||||
return info_dict
|
||||
|
||||
|
||||
class FacebookPluginsVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
|
||||
'md5': '5954e92cdfe51fe5782ae9bda7058a07',
|
||||
'info_dict': {
|
||||
'id': '10154383743583686',
|
||||
'ext': 'mp4',
|
||||
'title': 'What to do during the haze?',
|
||||
'uploader': 'Gov.sg',
|
||||
'upload_date': '20160826',
|
||||
'timestamp': 1472184808,
|
||||
},
|
||||
'add_ie': [FacebookIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(
|
||||
compat_urllib_parse_unquote(self._match_id(url)),
|
||||
FacebookIE.ie_key())
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class GlideIE(InfoExtractor):
|
||||
@@ -14,10 +13,8 @@ class GlideIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'UZF8zlmuQbe4mr+7dCiQ0w==',
|
||||
'ext': 'mp4',
|
||||
'title': 'Damon Timm\'s Glide message',
|
||||
'title': "Damon's Glide message",
|
||||
'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
|
||||
'uploader': 'Damon Timm',
|
||||
'upload_date': '20140919',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,7 +24,8 @@ class GlideIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'title')
|
||||
r'<title>(.+?)</title>', webpage,
|
||||
'title', default=None) or self._og_search_title(webpage)
|
||||
video_url = self._proto_relative_url(self._search_regex(
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'video URL', default=None,
|
||||
@@ -36,18 +34,10 @@ class GlideIE(InfoExtractor):
|
||||
r'<img[^>]+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'thumbnail url', default=None,
|
||||
group='url')) or self._og_search_thumbnail(webpage)
|
||||
uploader = self._search_regex(
|
||||
r'<div[^>]+class=["\']info-name["\'][^>]*>([^<]+)',
|
||||
webpage, 'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<div[^>]+class="info-date"[^>]*>([^<]+)',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
101
youtube_dl/extractor/go.py
Normal file
101
youtube_dl/extractor/go.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
parse_age_limit,
|
||||
)
|
||||
|
||||
|
||||
class GoIE(InfoExtractor):
|
||||
_BRANDS = {
|
||||
'abc': '001',
|
||||
'freeform': '002',
|
||||
'watchdisneychannel': '004',
|
||||
'watchdisneyjunior': '008',
|
||||
'watchdisneyxd': '009',
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/.*?vdka(?P<id>\w+)' % '|'.join(_BRANDS.keys())
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
||||
'info_dict': {
|
||||
'id': '0_g86w5onx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sneak Peek: Language Arts',
|
||||
'description': 'md5:7dcdab3b2d17e5217c953256af964e9c',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
video_data = self._download_json(
|
||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (self._BRANDS[sub_domain], video_id),
|
||||
video_id)['video'][0]
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
for asset in video_data.get('assets', {}).get('asset', []):
|
||||
asset_url = asset.get('value')
|
||||
if not asset_url:
|
||||
continue
|
||||
format_id = asset.get('format')
|
||||
ext = determine_ext(asset_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': asset_url,
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for cc in video_data.get('closedcaption', {}).get('src', []):
|
||||
cc_url = cc.get('value')
|
||||
if not cc_url:
|
||||
continue
|
||||
ext = determine_ext(cc_url)
|
||||
if ext == 'xml':
|
||||
ext = 'ttml'
|
||||
subtitles.setdefault(cc.get('lang'), []).append({
|
||||
'url': cc_url,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []):
|
||||
thumbnail_url = thumbnail.get('value')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('longdescription') or video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000),
|
||||
'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')),
|
||||
'episode_number': int_or_none(video_data.get('episodenumber')),
|
||||
'series': video_data.get('show', {}).get('title'),
|
||||
'season_number': int_or_none(video_data.get('season', {}).get('num')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
@@ -34,11 +34,12 @@ class LimelightBaseIE(InfoExtractor):
|
||||
def _extract_info(self, streams, mobile_urls, properties):
|
||||
video_id = properties['media_id']
|
||||
formats = []
|
||||
|
||||
urls = []
|
||||
for stream in streams:
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url or stream.get('drmProtected'):
|
||||
if not stream_url or stream.get('drmProtected') or stream_url in urls:
|
||||
continue
|
||||
urls.append(stream_url)
|
||||
ext = determine_ext(stream_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
@@ -58,9 +59,11 @@ class LimelightBaseIE(InfoExtractor):
|
||||
format_id = 'rtmp'
|
||||
if stream.get('videoBitRate'):
|
||||
format_id += '-%d' % int_or_none(stream['videoBitRate'])
|
||||
http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:])
|
||||
urls.append(http_url)
|
||||
http_fmt = fmt.copy()
|
||||
http_fmt.update({
|
||||
'url': 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]),
|
||||
'url': http_url,
|
||||
'format_id': format_id.replace('rtmp', 'http'),
|
||||
})
|
||||
formats.append(http_fmt)
|
||||
@@ -76,8 +79,9 @@ class LimelightBaseIE(InfoExtractor):
|
||||
for mobile_url in mobile_urls:
|
||||
media_url = mobile_url.get('mobileUrl')
|
||||
format_id = mobile_url.get('targetMediaPlatform')
|
||||
if not media_url or format_id == 'Widevine':
|
||||
if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
|
||||
continue
|
||||
urls.append(media_url)
|
||||
ext = determine_ext(media_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
|
||||
@@ -7,22 +7,19 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class SSAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://ssa\.nls\.uk/film/(?P<id>\d+)'
|
||||
class MovingImageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://ssa.nls.uk/film/3561',
|
||||
'url': 'http://movingimage.nls.uk/film/3561',
|
||||
'md5': '4caa05c2b38453e6f862197571a7be2f',
|
||||
'info_dict': {
|
||||
'id': '3561',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'SHETLAND WOOL',
|
||||
'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
|
||||
'duration': 900,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -30,10 +27,9 @@ class SSAIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
streamer = self._search_regex(
|
||||
r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
|
||||
play_path = self._search_regex(
|
||||
r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'),
|
||||
video_id, ext='mp4', entry_protocol='m3u8_native')
|
||||
|
||||
def search_field(field_name, fatal=False):
|
||||
return self._search_regex(
|
||||
@@ -44,13 +40,11 @@ class SSAIE(InfoExtractor):
|
||||
description = unescapeHTML(search_field('Description'))
|
||||
duration = parse_duration(search_field('Running time'))
|
||||
thumbnail = self._search_regex(
|
||||
r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
|
||||
r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': streamer,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
@@ -13,7 +13,7 @@ class MyVidsterIE(InfoExtractor):
|
||||
'id': '3685814',
|
||||
'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
|
||||
'upload_date': '20141027',
|
||||
'uploader_id': 'utkualp',
|
||||
'uploader': 'utkualp',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 18,
|
||||
},
|
||||
|
||||
@@ -1,26 +1,37 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hmac
|
||||
import hashlib
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class NYTimesBaseIE(InfoExtractor):
|
||||
_SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v'
|
||||
|
||||
def _extract_video_from_id(self, video_id):
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
# Authorization generation algorithm is reverse engineered from `signer` in
|
||||
# http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js
|
||||
path = '/svc/video/api/v3/video/' + video_id
|
||||
hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest()
|
||||
video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={
|
||||
'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(),
|
||||
'X-NYTV': 'vhs',
|
||||
}, fatal=False)
|
||||
if not video_data:
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/' + video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
title = video_data['headline']
|
||||
description = video_data.get('summary')
|
||||
duration = float_or_none(video_data.get('duration'), 1000)
|
||||
|
||||
uploader = video_data.get('byline')
|
||||
publication_date = video_data.get('publication_date')
|
||||
timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
|
||||
|
||||
def get_file_size(file_size):
|
||||
if isinstance(file_size, int):
|
||||
@@ -28,35 +39,59 @@ class NYTimesBaseIE(InfoExtractor):
|
||||
elif isinstance(file_size, dict):
|
||||
return int(file_size.get('value', 0))
|
||||
else:
|
||||
return 0
|
||||
return None
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': video['url'],
|
||||
'format_id': video.get('type'),
|
||||
'vcodec': video.get('video_codec'),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': get_file_size(video.get('fileSize')),
|
||||
} for video in video_data['renditions'] if video.get('url')
|
||||
]
|
||||
urls = []
|
||||
formats = []
|
||||
for video in video_data.get('renditions', []):
|
||||
video_url = video.get('url')
|
||||
format_id = video.get('type')
|
||||
if not video_url or format_id == 'thumbs' or video_url in urls:
|
||||
continue
|
||||
urls.append(video_url)
|
||||
ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id or 'hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
continue
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# video_url, video_id, format_id or 'dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'vcodec': video.get('videoencoding') or video.get('video_codec'),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
|
||||
'tbr': int_or_none(video.get('bitrate'), 1000),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [
|
||||
{
|
||||
'url': 'http://www.nytimes.com/%s' % image['url'],
|
||||
thumbnails = []
|
||||
for image in video_data.get('images', []):
|
||||
image_url = image.get('url')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': 'http://www.nytimes.com/' + image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in video_data.get('images', []) if image.get('url')
|
||||
]
|
||||
})
|
||||
|
||||
publication_date = video_data.get('publication_date')
|
||||
timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'description': video_data.get('summary'),
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'uploader': video_data.get('byline'),
|
||||
'duration': float_or_none(video_data.get('duration'), 1000),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
@@ -67,7 +102,7 @@ class NYTimesIE(NYTimesBaseIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': '18a525a510f942ada2720db5f31644c0',
|
||||
'md5': 'd665342765db043f7e225cff19df0f2d',
|
||||
'info_dict': {
|
||||
'id': '100000002847155',
|
||||
'ext': 'mov',
|
||||
|
||||
@@ -26,6 +26,8 @@ class PornComIE(InfoExtractor):
|
||||
'duration': 551,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
|
||||
@@ -75,7 +77,14 @@ class PornComIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, 'view count'))
|
||||
r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage,
|
||||
'view count', fatal=False))
|
||||
|
||||
def extract_list(kind):
|
||||
s = self._search_regex(
|
||||
r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(),
|
||||
webpage, kind, fatal=False)
|
||||
return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -86,4 +95,6 @@ class PornComIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
'categories': extract_list('categories'),
|
||||
'tags': extract_list('tags'),
|
||||
}
|
||||
|
||||
@@ -35,6 +35,7 @@ class SouthParkEsIE(SouthParkIE):
|
||||
'description': 'Cartman Consigue Una Sonda Anal',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
'skip': 'Geo-restricted',
|
||||
}]
|
||||
|
||||
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from ..compat import compat_parse_qs
|
||||
|
||||
|
||||
class TheStarIE(InfoExtractor):
|
||||
@@ -30,6 +28,9 @@ class TheStarIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
brightcove_id = self._search_regex(
|
||||
r'mainartBrightcoveVideoId["\']?\s*:\s*["\']?(\d+)',
|
||||
webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
'BrightcoveNew', brightcove_id)
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate
|
||||
)
|
||||
|
||||
|
||||
class THVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://thvideo.tv/v/th1987/',
|
||||
'md5': 'fa107b1f73817e325e9433505a70db50',
|
||||
'info_dict': {
|
||||
'id': '1987',
|
||||
'ext': 'mp4',
|
||||
'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览',
|
||||
'display_id': 'th1987',
|
||||
'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg',
|
||||
'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...',
|
||||
'upload_date': '20140722'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# extract download link from mobile player page
|
||||
webpage_player = self._download_webpage(
|
||||
'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id),
|
||||
video_id, note='Downloading video source page')
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="(.*?)" type', webpage_player, 'video url')
|
||||
|
||||
# extract video info from main page
|
||||
webpage = self._download_webpage(
|
||||
'http://thvideo.tv/v/th%s' % (video_id), video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
display_id = 'th%s' % video_id
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'span itemprop="datePublished" content="(.*?)">', webpage,
|
||||
'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'upload_date': upload_date
|
||||
}
|
||||
|
||||
|
||||
class THVideoPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?thvideo\.tv/mylist(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://thvideo.tv/mylist2',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': '幻想万華鏡',
|
||||
},
|
||||
'playlist_mincount': 23,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
list_title = self._html_search_regex(
|
||||
r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title',
|
||||
fatal=False)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://thvideo.tv/v/th' + id, 'THVideo')
|
||||
for id in re.findall(r'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage)]
|
||||
|
||||
return self.playlist_result(entries, playlist_id, list_title)
|
||||
@@ -8,7 +8,6 @@ import re
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -17,6 +16,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
@@ -39,7 +39,7 @@ class YahooIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
|
||||
'md5': 'c3466d2b6d5dd6b9f41ba9ed04c24b23',
|
||||
'md5': '251af144a19ebc4a033e8ba91ac726bb',
|
||||
'info_dict': {
|
||||
'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9',
|
||||
'ext': 'mp4',
|
||||
@@ -50,7 +50,7 @@ class YahooIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
|
||||
'md5': '75ffabdb87c16d4ffe8c036dc4d1c136',
|
||||
'md5': '7993e572fac98e044588d0b5260f4352',
|
||||
'info_dict': {
|
||||
'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
|
||||
'ext': 'mp4',
|
||||
@@ -61,7 +61,7 @@ class YahooIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html',
|
||||
'md5': '9035d38f88b1782682a3e89f985be5bb',
|
||||
'md5': '45c024bad51e63e9b6f6fad7a43a8c23',
|
||||
'info_dict': {
|
||||
'id': 'cac903b3-fcf4-3c14-b632-643ab541712f',
|
||||
'ext': 'mp4',
|
||||
@@ -72,10 +72,10 @@ class YahooIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
|
||||
'md5': '0b51660361f0e27c9789e7037ef76f4b',
|
||||
'md5': '71298482f7c64cbb7fa064e4553ff1c1',
|
||||
'info_dict': {
|
||||
'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
|
||||
'description': 'md5:f66c890e1490f4910a9953c941dee944',
|
||||
'duration': 97,
|
||||
@@ -98,7 +98,7 @@ class YahooIE(InfoExtractor):
|
||||
'id': '154609075',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'f8e336c6b66f503282e5f719641d6565',
|
||||
'md5': '000887d0dc609bc3a47c974151a40fb8',
|
||||
'info_dict': {
|
||||
'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
|
||||
'ext': 'mp4',
|
||||
@@ -107,7 +107,7 @@ class YahooIE(InfoExtractor):
|
||||
'duration': 30,
|
||||
},
|
||||
}, {
|
||||
'md5': '958bcb90b4d6df71c56312137ee1cd5a',
|
||||
'md5': '81bc74faf10750fe36e4542f9a184c66',
|
||||
'info_dict': {
|
||||
'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9',
|
||||
'ext': 'mp4',
|
||||
@@ -139,7 +139,7 @@ class YahooIE(InfoExtractor):
|
||||
'skip': 'Domain name in.lifestyle.yahoo.com gone',
|
||||
}, {
|
||||
'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
|
||||
'md5': 'b17ac378b1134fa44370fb27db09a744',
|
||||
'md5': '2a9752f74cb898af5d1083ea9f661b58',
|
||||
'info_dict': {
|
||||
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
|
||||
'ext': 'mp4',
|
||||
@@ -168,7 +168,7 @@ class YahooIE(InfoExtractor):
|
||||
}, {
|
||||
# Query result is embedded in webpage, but explicit request to video API fails with geo restriction
|
||||
'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
|
||||
'md5': '1ddbf7c850777548438e5c4f147c7b8c',
|
||||
'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
|
||||
'info_dict': {
|
||||
'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
|
||||
'ext': 'mp4',
|
||||
@@ -196,6 +196,7 @@ class YahooIE(InfoExtractor):
|
||||
'description': 'Galactic',
|
||||
'title': 'Dolla Diva (feat. Maggie Koerner)',
|
||||
},
|
||||
'skip': 'redirect to https://www.yahoo.com/music',
|
||||
},
|
||||
]
|
||||
|
||||
@@ -213,15 +214,7 @@ class YahooIE(InfoExtractor):
|
||||
entries = []
|
||||
iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
|
||||
for idx, iframe_url in enumerate(iframe_urls):
|
||||
iframepage = self._download_webpage(
|
||||
host + iframe_url, display_id,
|
||||
note='Downloading iframe webpage for video #%d' % idx)
|
||||
items_json = self._search_regex(
|
||||
r'mediaItems: (\[.+?\])$', iframepage, 'items', flags=re.MULTILINE, default=None)
|
||||
if items_json:
|
||||
items = json.loads(items_json)
|
||||
video_id = items[0]['id']
|
||||
entries.append(self._get_info(video_id, display_id, webpage))
|
||||
entries.append(self.url_result(host + iframe_url, 'Yahoo'))
|
||||
if entries:
|
||||
return self.playlist_result(entries, page_id)
|
||||
|
||||
@@ -246,7 +239,9 @@ class YahooIE(InfoExtractor):
|
||||
if config:
|
||||
sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
|
||||
if sapi and 'query' in sapi:
|
||||
return self._extract_info(display_id, sapi, webpage)
|
||||
info = self._extract_info(display_id, sapi, webpage)
|
||||
self._sort_formats(info['formats'])
|
||||
return info
|
||||
|
||||
items_json = self._search_regex(
|
||||
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
|
||||
@@ -292,15 +287,17 @@ class YahooIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for s in info['streams']:
|
||||
tbr = int_or_none(s.get('bitrate'))
|
||||
format_info = {
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
'tbr': int_or_none(s.get('bitrate')),
|
||||
'tbr': tbr,
|
||||
}
|
||||
|
||||
host = s['host']
|
||||
path = s['path']
|
||||
if host.startswith('rtmp'):
|
||||
fmt = 'rtmp'
|
||||
format_info.update({
|
||||
'url': host,
|
||||
'play_path': path,
|
||||
@@ -308,14 +305,18 @@ class YahooIE(InfoExtractor):
|
||||
})
|
||||
else:
|
||||
if s.get('format') == 'm3u8_playlist':
|
||||
format_info['protocol'] = 'm3u8_native'
|
||||
format_info['ext'] = 'mp4'
|
||||
fmt = 'hls'
|
||||
format_info.update({
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
else:
|
||||
fmt = format_info['ext'] = determine_ext(path)
|
||||
format_url = compat_urlparse.urljoin(host, path)
|
||||
format_info['url'] = format_url
|
||||
format_info['format_id'] = fmt + ('-%d' % tbr if tbr else '')
|
||||
formats.append(format_info)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
closed_captions = self._html_search_regex(
|
||||
r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',
|
||||
default='[]')
|
||||
@@ -346,17 +347,25 @@ class YahooIE(InfoExtractor):
|
||||
def _get_info(self, video_id, display_id, webpage):
|
||||
region = self._search_regex(
|
||||
r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
|
||||
webpage, 'region', fatal=False, default='US')
|
||||
data = compat_urllib_parse_urlencode({
|
||||
'protocol': 'http',
|
||||
'region': region.upper(),
|
||||
})
|
||||
query_url = (
|
||||
'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
|
||||
'{id}?{data}'.format(id=video_id, data=data))
|
||||
query_result = self._download_json(
|
||||
query_url, display_id, 'Downloading video info')
|
||||
return self._extract_info(display_id, query_result, webpage)
|
||||
webpage, 'region', fatal=False, default='US').upper()
|
||||
formats = []
|
||||
info = {}
|
||||
for fmt in ('webm', 'mp4'):
|
||||
query_result = self._download_json(
|
||||
'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
|
||||
display_id, 'Downloading %s video info' % fmt, query={
|
||||
'protocol': 'http',
|
||||
'region': region,
|
||||
'format': fmt,
|
||||
})
|
||||
info = self._extract_info(display_id, query_result, webpage)
|
||||
formats.extend(info['formats'])
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
'http://video.media.yql.yahoo.com/v1/hls/%s?region=%s' % (video_id, region),
|
||||
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
|
||||
class YahooSearchIE(SearchInfoExtractor):
|
||||
|
||||
@@ -35,7 +35,7 @@ class YouPornIE(InfoExtractor):
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
# Anonymous User uploader
|
||||
# Unknown uploader
|
||||
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
|
||||
'info_dict': {
|
||||
'id': '561726',
|
||||
@@ -44,7 +44,7 @@ class YouPornIE(InfoExtractor):
|
||||
'title': 'Big Tits Awesome Brunette On amazing webcam show',
|
||||
'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Anonymous User',
|
||||
'uploader': 'Unknown',
|
||||
'upload_date': '20111125',
|
||||
'average_rating': int,
|
||||
'view_count': int,
|
||||
@@ -140,17 +140,17 @@ class YouPornIE(InfoExtractor):
|
||||
r'>All [Cc]omments? \(([\d,.]+)\)',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
def extract_tag_box(title):
|
||||
tag_box = self._search_regex(
|
||||
(r'<div[^>]+class=["\']tagBoxTitle["\'][^>]*>\s*%s\b.*?</div>\s*'
|
||||
'<div[^>]+class=["\']tagBoxContent["\']>(.+?)</div>') % re.escape(title),
|
||||
webpage, '%s tag box' % title, default=None)
|
||||
def extract_tag_box(regex, title):
|
||||
tag_box = self._search_regex(regex, webpage, title, default=None)
|
||||
if not tag_box:
|
||||
return []
|
||||
return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box)
|
||||
|
||||
categories = extract_tag_box('Category')
|
||||
tags = extract_tag_box('Tags')
|
||||
categories = extract_tag_box(
|
||||
r'(?s)Categories:.*?</[^>]+>(.+?)</div>', 'categories')
|
||||
tags = extract_tag_box(
|
||||
r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
|
||||
'tags')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -844,6 +844,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
|
||||
'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# Rental video preview
|
||||
'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
|
||||
'info_dict': {
|
||||
'id': 'uGpuVWrhIzE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Piku - Trailer',
|
||||
'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
|
||||
'upload_date': '20150811',
|
||||
'uploader': 'FlixMatrix',
|
||||
'uploader_id': 'FlixMatrixKaravan',
|
||||
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
|
||||
'license': 'Standard YouTube License',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@@ -1254,6 +1272,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Convert to the same format returned by compat_parse_qs
|
||||
video_info = dict((k, [v]) for k, v in args.items())
|
||||
add_dash_mpd(video_info)
|
||||
# Rental video is not rented but preview is available (e.g.
|
||||
# https://www.youtube.com/watch?v=yYr8q0y5Jfg,
|
||||
# https://github.com/rg3/youtube-dl/issues/10532)
|
||||
if not video_info and args.get('ypc_vid'):
|
||||
return self.url_result(
|
||||
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
|
||||
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
||||
is_live = True
|
||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
@@ -1841,6 +1865,28 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
|
||||
},
|
||||
'playlist_mincout': 21,
|
||||
}, {
|
||||
# Playlist URL that does not actually serve a playlist
|
||||
'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
|
||||
'info_dict': {
|
||||
'id': 'FqZTN594JQw',
|
||||
'ext': 'webm',
|
||||
'title': "Smiley's People 01 detective, Adventure Series, Action",
|
||||
'uploader': 'STREEM',
|
||||
'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
|
||||
'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
|
||||
'upload_date': '20150526',
|
||||
'license': 'Standard YouTube License',
|
||||
'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
|
||||
'categories': ['People & Blogs'],
|
||||
'tags': list,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [YoutubeIE.ie_key()],
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -1901,9 +1947,20 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
|
||||
page, 'title')
|
||||
page, 'title', default=None)
|
||||
|
||||
return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
|
||||
has_videos = True
|
||||
|
||||
if not playlist_title:
|
||||
try:
|
||||
# Some playlist URLs don't actually serve a playlist (e.g.
|
||||
# https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
|
||||
next(self._entries(page, playlist_id))
|
||||
except StopIteration:
|
||||
has_videos = False
|
||||
|
||||
return has_videos, self.playlist_result(
|
||||
self._entries(page, playlist_id), playlist_id, playlist_title)
|
||||
|
||||
def _check_download_just_video(self, url, playlist_id):
|
||||
# Check if it's a video-specific URL
|
||||
@@ -1912,9 +1969,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
video_id = query_dict['v'][0]
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
return video_id, None
|
||||
return None, None
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract playlist id
|
||||
@@ -1923,7 +1982,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
playlist_id = mobj.group(1) or mobj.group(2)
|
||||
|
||||
video = self._check_download_just_video(url, playlist_id)
|
||||
video_id, video = self._check_download_just_video(url, playlist_id)
|
||||
if video:
|
||||
return video
|
||||
|
||||
@@ -1931,7 +1990,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
# Mixes require a custom extraction process
|
||||
return self._extract_mix(playlist_id)
|
||||
|
||||
return self._extract_playlist(playlist_id)
|
||||
has_videos, playlist = self._extract_playlist(playlist_id)
|
||||
if has_videos or not video_id:
|
||||
return playlist
|
||||
|
||||
# Some playlist URLs don't actually serve a playlist (see
|
||||
# https://github.com/rg3/youtube-dl/issues/10537).
|
||||
# Fallback to plain video extraction if there is a video id
|
||||
# along with playlist id.
|
||||
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
|
||||
|
||||
class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
||||
@@ -2312,7 +2379,8 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE):
|
||||
video = self._check_download_just_video(url, 'WL')
|
||||
if video:
|
||||
return video
|
||||
return self._extract_playlist('WL')
|
||||
_, playlist = self._extract_playlist('WL')
|
||||
return playlist
|
||||
|
||||
|
||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
@@ -2148,7 +2148,7 @@ def mimetype2ext(mt):
|
||||
return ext
|
||||
|
||||
_, _, res = mt.rpartition('/')
|
||||
res = res.lower()
|
||||
res = res.split(';')[0].strip().lower()
|
||||
|
||||
return {
|
||||
'3gpp': '3gp',
|
||||
@@ -2168,6 +2168,7 @@ def mimetype2ext(mt):
|
||||
'f4m+xml': 'f4m',
|
||||
'hds+xml': 'f4m',
|
||||
'vnd.ms-sstr+xml': 'ism',
|
||||
'quicktime': 'mov',
|
||||
}.get(res, res)
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.08.31'
|
||||
__version__ = '2016.09.03'
|
||||
|
||||
Reference in New Issue
Block a user