1
0
mirror of https://gitlab.com/ytdl-org/youtube-dl.git synced 2026-01-24 00:00:10 -05:00

Compare commits

...

27 Commits

Author SHA1 Message Date
Philipp Hagemeister
11c60089a8 release 2016.01.14 2016-01-14 15:43:21 +01:00
Sergey M․
abb893e6e4 [beeg] Update API URL 2016-01-14 19:57:56 +06:00
Sergey M․
4511c1976d [beeg] Fix extraction (Closes #8225) 2016-01-14 19:57:20 +06:00
remitamine
40cf7fcbd2 [tudou] Add support for Albums and Playlists and extract more metadata 2016-01-13 13:29:00 +01:00
Yen Chi Hsuan
cc28492d31 [youtube] Fix acodec and vcodec order
In RFC6381, there's no rule stating that the first part of codecs should
be video and the second part should be audio, while it seems the case
for data reported by YouTube.
2016-01-13 17:05:38 +08:00
Sergey M․
bc0550c262 [pluralsight] Fix new player (Closes #8215) 2016-01-13 08:18:37 +06:00
Sergey M․
b83b782dc4 [downloader/fragment] Move helper data to context dict 2016-01-13 00:00:31 +06:00
Sergey M․
16a348475c [dailymotion] Prefer direct links (Closes #8156) 2016-01-12 23:23:39 +06:00
Sergey M․
709185a264 [downloader/fragment] More smooth calculations
`downloaded_bytes` is now updated on each fragment progress hook invocation
2016-01-12 23:18:38 +06:00
Sergey M․
9cb1a06b6c [downloader/fragment] Remove unused code and fix zero division error 2016-01-12 22:09:38 +06:00
Sergey M․
be27283ef6 [iprima] Mark broken 2016-01-11 22:00:17 +06:00
Sergey M․
b924bfad68 [videott] Mark broken 2016-01-11 21:58:32 +06:00
Sergey M․
192b9a571c [videomega] Mark broken 2016-01-11 21:56:19 +06:00
remitamine
6ec6cb4e95 Revert "fix typos"
This reverts commit 36a0e46c39.
2016-01-10 19:27:22 +01:00
remitamine
36a0e46c39 fix typos 2016-01-10 17:55:41 +01:00
Jakub Wilk
dfb1b1468c Fix typos
Closes #8200.
2016-01-10 17:24:28 +01:00
Jaime Marquínez Ferrándiz
3c91e41614 [downloader/fragment] Don't fail if the 'Content-Length' header is missing
In some dailymotion videos (like http://www.dailymotion.com/video/x3k0dtv from #8156) the segments URLs don't have the 'Content-Length' header and HttpFD sets the 'totat_bytes' field to None, so we also use '0' in that case (since we do different math operations with it).
2016-01-10 14:41:38 +01:00
Jaime Marquínez Ferrándiz
7e8a800f29 [bigflix] Use correct indentation to make flake8 happy 2016-01-10 14:26:27 +01:00
remitamine
2334762b03 [shahid] raise ExtractorError if the video is DRM protected 2016-01-10 07:55:58 +01:00
remitamine
3fc088f8c7 [dcn] extract video ids in season entries 2016-01-10 07:45:41 +01:00
Sergey M․
a9bbd26f1d [bigflix] Improve formats extraction 2016-01-10 10:49:27 +06:00
Sergey M․
6e99d5762a [bigflix] Extract all formats 2016-01-10 10:31:36 +06:00
Sergey M․
15b1c6656f Credit @vickyg3 for bigflix (#8194) 2016-01-10 10:03:56 +06:00
Sergey M
d412794205 Merge pull request #8194 from vickyg3/bigflix_ie
[Bigflix] Add new extractor for bigflix.com
2016-01-10 09:02:18 +05:00
Vignesh Venkat
0a899a1448 [Bigflix] Add new extractor for bigflix.com
Add an IE to support bigflix.com. It uses some sort of silverlight
plugin whose video url is being populated using base64 encoded
flashvars. So it is quite straightforward to extract.
2016-01-09 19:45:58 -08:00
Sergey M․
7a34302e95 [canalc2] Fix extraction (Closes #8191) 2016-01-10 01:37:10 +06:00
Jaime Marquínez Ferrándiz
27783821af [xhamster] Remove unused import 2016-01-09 11:16:23 +01:00
34 changed files with 263 additions and 83 deletions

View File

@@ -151,3 +151,4 @@ Muratcan Simsek
Evan Lu
flatgreen
Brian Foley
Vignesh Venkat

View File

@@ -5,7 +5,7 @@ from __future__ import with_statement, unicode_literals
import datetime
import glob
import io # For Python 2 compatibilty
import io # For Python 2 compatibility
import os
import re

View File

@@ -65,6 +65,7 @@
- **Beeg**
- **BehindKink**
- **Bet**
- **Bigflix**
- **Bild**: Bild.de
- **BiliBili**
- **BleacherReport**
@@ -251,7 +252,7 @@
- **Instagram**
- **instagram:user**: Instagram user profile
- **InternetVideoArchive**
- **IPrima**
- **IPrima** (Currently broken)
- **iqiyi**: 爱奇艺
- **Ir90Tv**
- **ivi**: ivi.ru
@@ -602,7 +603,9 @@
- **TruTube**
- **Tube8**
- **TubiTv**
- **Tudou**
- **tudou**
- **tudou:album**
- **tudou:playlist**
- **Tumblr**
- **tunein:clip**
- **tunein:program**
@@ -655,12 +658,12 @@
- **video.mit.edu**
- **VideoDetective**
- **videofy.me**
- **VideoMega**
- **VideoMega** (Currently broken)
- **videomore**
- **videomore:season**
- **videomore:video**
- **VideoPremium**
- **VideoTt**: video.tt - Your True Tube
- **VideoTt**: video.tt - Your True Tube (Currently broken)
- **videoweed**: VideoWeed
- **Vidme**
- **Vidzi**

View File

@@ -66,7 +66,7 @@ class TestAnnotations(unittest.TestCase):
textTag = a.find('TEXT')
text = textTag.text
self.assertTrue(text in expected) # assertIn only added in python 2.7
# remove the first occurance, there could be more than one annotation with the same text
# remove the first occurrence, there could be more than one annotation with the same text
expected.remove(text)
# We should have seen (and removed) all the expected annotation texts.
self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')

View File

@@ -1312,7 +1312,7 @@ class YoutubeDL(object):
# only set the 'formats' fields if the original info_dict list them
# otherwise we end up with a circular reference, the first (and unique)
# element in the 'formats' field in info_dict is info_dict itself,
# wich can't be exported to json
# which can't be exported to json
info_dict['formats'] = formats
if self.params.get('listformats'):
self.list_formats(info_dict)

View File

@@ -59,37 +59,43 @@ class FragmentFD(FileDownloader):
'filename': ctx['filename'],
'tmpfilename': ctx['tmpfilename'],
}
start = time.time()
ctx['started'] = start
ctx.update({
'started': start,
# Total complete fragments downloaded so far in bytes
'complete_frags_downloaded_bytes': 0,
# Amount of fragment's bytes downloaded by the time of the previous
# frag progress hook invocation
'prev_frag_downloaded_bytes': 0,
})
def frag_progress_hook(s):
if s['status'] not in ('downloading', 'finished'):
return
frag_total_bytes = s.get('total_bytes', 0)
if s['status'] == 'finished':
state['downloaded_bytes'] += frag_total_bytes
state['frag_index'] += 1
frag_total_bytes = s.get('total_bytes') or 0
estimated_size = (
(state['downloaded_bytes'] + frag_total_bytes) /
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
(state['frag_index'] + 1) * total_frags)
time_now = time.time()
state['total_bytes_estimate'] = estimated_size
state['elapsed'] = time_now - start
if s['status'] == 'finished':
progress = self.calc_percent(state['frag_index'], total_frags)
state['frag_index'] += 1
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
ctx['prev_frag_downloaded_bytes'] = 0
else:
frag_downloaded_bytes = s['downloaded_bytes']
frag_progress = self.calc_percent(frag_downloaded_bytes,
frag_total_bytes)
progress = self.calc_percent(state['frag_index'], total_frags)
progress += frag_progress / float(total_frags)
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
state['eta'] = self.calc_eta(
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
start, time_now, estimated_size,
state['downloaded_bytes'])
state['speed'] = s.get('speed')
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state)
ctx['dl'].add_progress_hook(frag_progress_hook)

View File

@@ -61,6 +61,7 @@ from .beeg import BeegIE
from .behindkink import BehindKinkIE
from .beatportpro import BeatportProIE
from .bet import BetIE
from .bigflix import BigflixIE
from .bild import BildIE
from .bilibili import BiliBiliIE
from .bleacherreport import (
@@ -722,7 +723,11 @@ from .trilulilu import TriluliluIE
from .trutube import TruTubeIE
from .tube8 import Tube8IE
from .tubitv import TubiTvIE
from .tudou import TudouIE
from .tudou import (
TudouIE,
TudouPlaylistIE,
TudouAlbumIE,
)
from .tumblr import TumblrIE
from .tunein import (
TuneInClipIE,

View File

@@ -34,7 +34,7 @@ class BeegIE(InfoExtractor):
video_id = self._match_id(url)
video = self._download_json(
'http://beeg.com/api/v5/video/%s' % video_id, video_id)
'https://api.beeg.com/api/v5/video/%s' % video_id, video_id)
def split(o, e):
def cut(s, x):
@@ -60,7 +60,7 @@ class BeegIE(InfoExtractor):
def decrypt_url(encrypted_url):
encrypted_url = self._proto_relative_url(
encrypted_url.replace('{DATA_MARKERS}', ''), 'http:')
encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')
key = self._search_regex(
r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
if not key:

View File

@@ -0,0 +1,85 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
class BigflixIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
'md5': 'ec76aa9b1129e2e5b301a474e54fab74',
'info_dict': {
'id': '16537',
'ext': 'mp4',
'title': 'Singham Returns',
'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d',
}
}, {
# 2 formats
'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
'info_dict': {
'id': '16070',
'ext': 'mp4',
'title': 'Madarasapatinam',
'description': 'md5:63b9b8ed79189c6f0418c26d9a3452ca',
'formats': 'mincount:2',
},
'params': {
'skip_download': True,
}
}, {
# multiple formats
'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
webpage, 'title')
def decode_url(quoted_b64_url):
return base64.b64decode(compat_urllib_parse_unquote(
quoted_b64_url).encode('ascii')).decode('utf-8')
formats = []
for height, encoded_url in re.findall(
r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
video_url = decode_url(encoded_url)
f = {
'url': video_url,
'format_id': '%sp' % height,
'height': int(height),
}
if video_url.startswith('rtmp'):
f['ext'] = 'flv'
formats.append(f)
file_url = self._search_regex(
r'file=([^&]+)', webpage, 'video url', default=None)
if file_url:
video_url = decode_url(file_url)
if all(f['url'] != video_url for f in formats):
formats.append({
'url': decode_url(file_url),
})
self._sort_formats(formats)
description = self._html_search_meta('description', webpage)
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats
}

View File

@@ -9,9 +9,9 @@ from ..utils import parse_duration
class Canalc2IE(InfoExtractor):
IE_NAME = 'canalc2.tv'
_VALID_URL = r'https?://(?:www\.)?canalc2\.tv/video/(?P<id>\d+)'
_VALID_URL = r'https?://(?:(?:www\.)?canalc2\.tv/video/|archives-canalc2\.u-strasbg\.fr/video\.asp\?.*\bidVideo=)(?P<id>\d+)'
_TEST = {
_TESTS = [{
'url': 'http://www.canalc2.tv/video/12163',
'md5': '060158428b650f896c542dfbb3d6487f',
'info_dict': {
@@ -23,24 +23,36 @@ class Canalc2IE(InfoExtractor):
'params': {
'skip_download': True, # Requires rtmpdump
}
}
}, {
'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'jwplayer\((["\'])Player\1\)\.setup\({[^}]*file\s*:\s*(["\'])(?P<file>.+?)\2',
webpage, 'video_url', group='file')
formats = [{'url': video_url}]
if video_url.startswith('rtmp://'):
rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url)
formats[0].update({
'url': rtmp.group('url'),
'ext': 'flv',
'app': rtmp.group('app'),
'play_path': rtmp.group('play_path'),
'page_url': url,
})
webpage = self._download_webpage(
'http://www.canalc2.tv/video/%s' % video_id, video_id)
formats = []
for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
if video_url.startswith('rtmp://'):
rtmp = re.search(
r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url)
formats.append({
'url': rtmp.group('url'),
'format_id': 'rtmp',
'ext': 'flv',
'app': rtmp.group('app'),
'play_path': rtmp.group('play_path'),
'page_url': url,
})
else:
formats.append({
'url': video_url,
'format_id': 'http',
})
self._sort_formats(formats)
title = self._html_search_regex(
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title')

View File

@@ -313,9 +313,9 @@ class InfoExtractor(object):
except ExtractorError:
raise
except compat_http_client.IncompleteRead as e:
raise ExtractorError('A network error has occured.', cause=e, expected=True)
raise ExtractorError('A network error has occurred.', cause=e, expected=True)
except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occured.', cause=e)
raise ExtractorError('An extractor error has occurred.', cause=e)
def set_downloader(self, downloader):
"""Sets the downloader for this IE."""

View File

@@ -149,14 +149,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
ext = determine_ext(media_url)
if type_ == 'application/x-mpegURL' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
media_url, video_id, 'mp4', preference=-1,
m3u8_id='hls', fatal=False))
elif type_ == 'application/f4m' or ext == 'f4m':
formats.extend(self._extract_f4m_formats(
media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
else:
f = {
'url': media_url,
'format_id': quality,
'format_id': 'http-%s' % quality,
}
m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
if m:

View File

@@ -5,7 +5,10 @@ import re
import base64
from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..compat import (
compat_urllib_parse,
compat_str,
)
from ..utils import (
int_or_none,
parse_iso8601,
@@ -186,7 +189,8 @@ class DCNSeasonIE(InfoExtractor):
entries = []
for video in show['videos']:
video_id = compat_str(video['id'])
entries.append(self.url_result(
'http://www.dcndigital.ae/media/%s' % video['id'], 'DCNVideo'))
'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo', video_id))
return self.playlist_result(entries, season_id, title)

View File

@@ -105,7 +105,7 @@ class FacebookIE(InfoExtractor):
login_results, 'login error', default=None, group='error')
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
self._downloader.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
return
fb_dtsg = self._search_regex(
@@ -126,7 +126,7 @@ class FacebookIE(InfoExtractor):
check_response = self._download_webpage(check_req, None,
note='Confirming login')
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
self._downloader.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))
return

View File

@@ -487,7 +487,7 @@ class GenericIE(InfoExtractor):
'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
}
},
# Embeded Ustream video
# Embedded Ustream video
{
'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
'md5': '27b99cdb639c9b12a79bca876a073417',
@@ -1644,7 +1644,7 @@ class GenericIE(InfoExtractor):
if myvi_url:
return self.url_result(myvi_url)
# Look for embeded soundcloud player
# Look for embedded soundcloud player
mobj = re.search(
r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
webpage)

View File

@@ -14,6 +14,7 @@ from ..utils import (
class IPrimaIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://play\.iprima\.cz/(?:[^/]+/)*(?P<id>[^?#]+)'
_TESTS = [{

View File

@@ -32,7 +32,7 @@ class IviIE(InfoExtractor):
},
'skip': 'Only works from Russia',
},
# Serial's serie
# Serial's series
{
'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
'md5': '221f56b35e3ed815fde2df71032f4b3e',

View File

@@ -17,7 +17,7 @@ class MDRIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html'
_TESTS = [{
# MDR regularily deletes its videos
# MDR regularly deletes its videos
'url': 'http://www.mdr.de/fakt/video189002.html',
'only_matching': True,
}, {

View File

@@ -100,7 +100,7 @@ class NBCSportsVPlayerIE(InfoExtractor):
class NBCSportsIE(InfoExtractor):
# Does not include https becuase its certificate is invalid
# Does not include https because its certificate is invalid
_VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
_TEST = {

View File

@@ -223,7 +223,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
response = self._download_webpage(request_url, playlist_title)
response = self._fix_json(response)
if not response.strip():
self._downloader.report_warning('Got an empty reponse, trying '
self._downloader.report_warning('Got an empty response, trying '
'adding the "newvideos" parameter')
response = self._download_webpage(request_url + '&newvideos=true',
playlist_title)

View File

@@ -37,7 +37,7 @@ class OraTVIE(InfoExtractor):
formats = self._extract_m3u8_formats(
m3u8_url, display_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
# simular to GameSpotIE
# similar to GameSpotIE
m3u8_path = compat_urlparse.urlparse(m3u8_url).path
QUALITIES_RE = r'((,[a-z]+\d+)+,?)'
available_qualities = self._search_regex(

View File

@@ -232,7 +232,7 @@ class PluralsightIE(PluralsightBaseIE):
# { a = author, cn = clip_id, lc = end, m = name }
return {
'id': clip['clipName'],
'id': clip.get('clipName') or clip['name'],
'title': '%s - %s' % (module['title'], clip['title']),
'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')),
'creator': author,

View File

@@ -73,6 +73,9 @@ class ShahidIE(InfoExtractor):
'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html'
% (video_id, api_vars['type']), video_id, 'Downloading player JSON')
if player.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
video = self._download_json(

View File

@@ -7,7 +7,7 @@ from ..utils import ExtractorError
class TestURLIE(InfoExtractor):
""" Allows adressing of the test cases as test:yout.*be_1 """
""" Allows addressing of the test cases as test:yout.*be_1 """
IE_DESC = False # Do not list
_VALID_URL = r'test(?:url)?:(?P<id>(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?)$'

View File

@@ -4,10 +4,16 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
float_or_none,
unescapeHTML,
)
class TudouIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/([^/]+/)*(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
IE_NAME = 'tudou'
_VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:(?:programs|wlplay)/view|(?:listplay|albumplay)/[\w-]{11})/(?P<id>[\w-]{11})'
_TESTS = [{
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
'md5': '140a49ed444bd22f93330985d8475fcb',
@@ -16,6 +22,11 @@ class TudouIE(InfoExtractor):
'ext': 'f4v',
'title': '卡马乔国足开大脚长传冲吊集锦',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1372113489000,
'description': '卡马乔卡家军,开大脚先进战术不完全集锦!',
'duration': 289.04,
'view_count': int,
'filesize': int,
}
}, {
'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/',
@@ -24,10 +35,12 @@ class TudouIE(InfoExtractor):
'ext': 'f4v',
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1349207518000,
'description': 'md5:294612423894260f2dcd5c6c04fe248b',
'duration': 5478.33,
'view_count': int,
'filesize': int,
}
}, {
'url': 'http://www.tudou.com/albumplay/cJAHGih4yYg.html',
'only_matching': True,
}]
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
@@ -42,24 +55,20 @@ class TudouIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
item_data = self._download_json(
'http://www.tudou.com/tvp/getItemInfo.action?ic=%s' % video_id, video_id)
youku_vcode = self._search_regex(
r'vcode\s*:\s*[\'"]([^\'"]*)[\'"]', webpage, 'youku vcode', default=None)
youku_vcode = item_data.get('vcode')
if youku_vcode:
return self.url_result('youku:' + youku_vcode, ie='Youku')
title = self._search_regex(
r',kw\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'title')
thumbnail_url = self._search_regex(
r',pic\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'thumbnail URL', fatal=False)
title = unescapeHTML(item_data['kw'])
description = item_data.get('desc')
thumbnail_url = item_data.get('pic')
view_count = int_or_none(item_data.get('playTimes'))
timestamp = int_or_none(item_data.get('pt'))
player_url = self._search_regex(
r'playerUrl\s*:\s*[\'"]([^\'"]+\.swf)[\'"]',
webpage, 'player URL', default=self._PLAYER_URL)
segments = self._parse_json(self._search_regex(
r'segs: \'([^\']+)\'', webpage, 'segments'), video_id)
segments = self._parse_json(item_data['itemSegs'], video_id)
# It looks like the keys are the arguments that have to be passed as
# the hd field in the request url, we pick the higher
# Also, filter non-number qualities (see issue #3643).
@@ -80,8 +89,13 @@ class TudouIE(InfoExtractor):
'ext': ext,
'title': title,
'thumbnail': thumbnail_url,
'description': description,
'view_count': view_count,
'timestamp': timestamp,
'duration': float_or_none(part.get('seconds'), 1000),
'filesize': int_or_none(part.get('size')),
'http_headers': {
'Referer': player_url,
'Referer': self._PLAYER_URL,
},
}
result.append(part_info)
@@ -92,3 +106,47 @@ class TudouIE(InfoExtractor):
'id': video_id,
'title': title,
}
class TudouPlaylistIE(InfoExtractor):
IE_NAME = 'tudou:playlist'
_VALID_URL = r'https?://(?:www\.)?tudou\.com/listplay/(?P<id>[\w-]{11})\.html'
_TESTS = [{
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo.html',
'info_dict': {
'id': 'zzdE77v6Mmo',
},
'playlist_mincount': 209,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
playlist_data = self._download_json(
'http://www.tudou.com/tvp/plist.action?lcode=%s' % playlist_id, playlist_id)
entries = [self.url_result(
'http://www.tudou.com/programs/view/%s' % item['icode'],
'Tudou', item['icode'],
item['kw']) for item in playlist_data['items']]
return self.playlist_result(entries, playlist_id)
class TudouAlbumIE(InfoExtractor):
IE_NAME = 'tudou:album'
_VALID_URL = r'https?://(?:www\.)?tudou\.com/album(?:cover|play)/(?P<id>[\w-]{11})'
_TESTS = [{
'url': 'http://www.tudou.com/albumplay/v5qckFJvNJg.html',
'info_dict': {
'id': 'v5qckFJvNJg',
},
'playlist_mincount': 45,
}]
def _real_extract(self, url):
album_id = self._match_id(url)
album_data = self._download_json(
'http://www.tudou.com/tvp/alist.action?acode=%s' % album_id, album_id)
entries = [self.url_result(
'http://www.tudou.com/programs/view/%s' % item['icode'],
'Tudou', item['icode'],
item['kw']) for item in album_data['items']]
return self.playlist_result(entries, album_id)

View File

@@ -67,7 +67,7 @@ class TV4IE(InfoExtractor):
info = self._download_json(
'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON')
# If is_geo_restricted is true, it doesn't neceserally mean we can't download it
# If is_geo_restricted is true, it doesn't necessarily mean we can't download it
if info['is_geo_restricted']:
self.report_warning('This content might not be available in your country due to licensing restrictions.')
if info['requires_subscription']:

View File

@@ -8,6 +8,7 @@ from ..utils import sanitized_Request
class VideoMegaIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?:videomega:|https?://(?:www\.)?videomega\.tv/(?:(?:view|iframe|cdn)\.php)?\?ref=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{
'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA',

View File

@@ -170,7 +170,7 @@ class VideomoreVideoIE(InfoExtractor):
'skip_download': True,
},
}, {
# season single serie with og:video:iframe
# season single series with og:video:iframe
'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya',
'only_matching': True,
}, {

View File

@@ -11,6 +11,7 @@ from ..utils import (
class VideoTtIE(InfoExtractor):
_WORKING = False
ID_NAME = 'video.tt'
IE_DESC = 'video.tt - Your True Tube'
_VALID_URL = r'http://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'

View File

@@ -6,7 +6,6 @@ from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
str_to_int,
unified_strdate,
)

View File

@@ -1487,7 +1487,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if codecs:
codecs = codecs.split(',')
if len(codecs) == 2:
acodec, vcodec = codecs[0], codecs[1]
acodec, vcodec = codecs[1], codecs[0]
else:
acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
dct.update({

View File

@@ -689,7 +689,7 @@ class SWFInterpreter(object):
elif mname in _builtin_classes:
res = _builtin_classes[mname]
else:
# Assume unitialized
# Assume uninitialized
# TODO warn here
res = undefined
stack.append(res)

View File

@@ -984,7 +984,7 @@ def date_from_str(date_str):
if sign == '-':
time = -time
unit = match.group('unit')
# A bad aproximation?
# A bad approximation?
if unit == 'month':
unit = 'day'
time *= 30
@@ -1307,7 +1307,7 @@ def parse_filesize(s):
if s is None:
return None
# The lower-case forms are of course incorrect and inofficial,
# The lower-case forms are of course incorrect and unofficial,
# but we support those too
_UNIT_TABLE = {
'B': 1,

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.01.09'
__version__ = '2016.01.14'