1
0
mirror of https://gitlab.com/ytdl-org/youtube-dl.git synced 2026-01-24 00:00:10 -05:00

Compare commits

...

10 Commits

Author SHA1 Message Date
Philipp Hagemeister
e5b9fac281 Bump version number 2011-09-14 22:55:26 +02:00
Philipp Hagemeister
08c1d0d3bc Update README 2011-09-14 22:55:09 +02:00
Anand Babu Periasamy
20e91e8375 Add --match-title and --reject-title (Closes #132) 2011-09-14 22:54:51 +02:00
Philipp Hagemeister
f9c6878714 Support for The Escapist 2011-09-14 22:26:53 +02:00
Philipp Hagemeister
8c5dc3ad40 Simplify IE index 2011-09-14 21:39:41 +02:00
Philipp Hagemeister
1d2e86aed9 Decapitalize options in README for consistency with youtube-dl --help 2011-09-14 21:20:23 +02:00
Philipp Hagemeister
a2f7e3a5bb Clarify usage 2011-09-14 21:19:33 +02:00
Philipp Hagemeister
f2a3a3522c typo in README 2011-09-14 21:18:22 +02:00
Philipp Hagemeister
b487ef0833 Fully implement comedycentral downloader 2011-09-14 21:17:05 +02:00
Philipp Hagemeister
d0922f29a3 Update LATEST_VERSION (oops) 2011-09-14 00:04:46 +02:00
3 changed files with 187 additions and 76 deletions

View File

@@ -1 +1 @@
2011.09.13
2011.09.14

View File

@@ -1,7 +1,7 @@
# youtube-dl
## USAGE
youtube-dl [OPTIONS] URL
youtube-dl [options] url [url...]
## DESCRIPTION
**youtube-dl** is a small command-line program to download videos from
@@ -17,9 +17,15 @@ which means you can modify it, redistribute it or use it however you like.
-i, --ignore-errors continue on download errors
-r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m)
-R, --retries RETRIES number of retries (default is 10)
--dump-user-agent display the current browser identification
### Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1)
--playlist-end NUMBER playlist video to end at (default is last)
--dump-user-agent display the current browser identification
--match-title REGEX download only matching titles (regex or caseless
sub-string)
--reject-title REGEX skip download for matching titles (regex or
caseless sub-string)
### Filesystem Options:
-t, --title use title in file name
@@ -68,7 +74,7 @@ which means you can modify it, redistribute it or use it however you like.
Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it.
### I get HTTP error 402 when trying to download a video. What''s this?
### I get HTTP error 402 when trying to download a video. What's this?
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.

View File

@@ -15,7 +15,7 @@ __author__ = (
)
__license__ = 'Public Domain'
__version__ = '2011.09.14'
__version__ = '2011.09.15'
UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
@@ -23,6 +23,7 @@ import cookielib
import datetime
import gzip
import htmlentitydefs
import HTMLParser
import httplib
import locale
import math
@@ -437,6 +438,8 @@ class FileDownloader(object):
noprogress: Do not print the progress bar.
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
@@ -712,6 +715,17 @@ class FileDownloader(object):
if filename is None:
return
matchtitle=self.params.get('matchtitle',False)
rejecttitle=self.params.get('rejecttitle',False)
title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle))
return
if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle))
return
if self.params.get('nooverwrites', False) and os.path.exists(filename):
self.to_stderr(u'WARNING: file exists and will be skipped')
return
@@ -822,7 +836,7 @@ class FileDownloader(object):
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
while retval == 2 or retval == 1:
prevsize = os.path.getsize(tmpfilename)
@@ -832,6 +846,11 @@ class FileDownloader(object):
cursize = os.path.getsize(tmpfilename)
if prevsize == cursize and retval == 1:
break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == 2 and cursize > 1024:
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = 0
break
if retval == 0:
self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
self.try_rename(tmpfilename, filename)
@@ -3055,6 +3074,9 @@ class ComedyCentralIE(InfoExtractor):
def report_config_download(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
def report_index_download(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
def report_player_url(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
@@ -3102,36 +3124,38 @@ class ComedyCentralIE(InfoExtractor):
return
epTitle = mobj.group('episode')
mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/(.*?:episode:([^:]*):)(.*?))"/>', html)
mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"/>', html)
if len(mMovieParams) == 0:
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
return
show_id = mMovieParams[0][2]
ACT_COUNT = { # TODO: Detect this dynamically
'thedailyshow.com': 4,
'colbertnation.com': 3,
}.get(show_id, 4)
OFFSET = {
'thedailyshow.com': 1,
'colbertnation.com': 1,
}.get(show_id, 1)
first_player_url = mMovieParams[0][0]
startMediaNum = int(mMovieParams[0][3]) + OFFSET
movieId = mMovieParams[0][1]
playerReq = urllib2.Request(first_player_url)
playerUrl_raw = mMovieParams[0][0]
self.report_player_url(epTitle)
try:
playerResponse = urllib2.urlopen(playerReq)
urlHandle = urllib2.urlopen(playerUrl_raw)
playerUrl = urlHandle.geturl()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download player: %s' % unicode(err))
self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
return
player_url = playerResponse.geturl()
for actNum in range(ACT_COUNT):
mediaNum = startMediaNum + actNum
mediaId = movieId + str(mediaNum)
uri = mMovieParams[0][1]
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
self.report_index_download(epTitle)
try:
indexXml = urllib2.urlopen(indexUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
return
idoc = xml.etree.ElementTree.fromstring(indexXml)
itemEls = idoc.findall('.//item')
for itemEl in itemEls:
mediaId = itemEl.findall('./guid')[0].text
shortMediaId = mediaId.split(':')[-1]
showId = mediaId.split(':')[-2].replace('.com', '')
officialTitle = itemEl.findall('./title')[0].text
officialDate = itemEl.findall('./pubDate')[0].text
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
urllib.urlencode({'uri': mediaId}))
configReq = urllib2.Request(configUrl)
@@ -3149,7 +3173,7 @@ class ComedyCentralIE(InfoExtractor):
turls.append(finfo)
if len(turls) == 0:
self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum) + ': No videos found')
self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
continue
# For now, just pick the highest bitrate
@@ -3157,28 +3181,115 @@ class ComedyCentralIE(InfoExtractor):
self._downloader.increment_downloads()
effTitle = show_id.replace('.com', '') + '-' + epTitle
effTitle = showId + '-' + epTitle
info = {
'id': str(mediaNum),
'id': shortMediaId,
'url': video_url,
'uploader': show_id,
'upload_date': 'NA',
'uploader': showId,
'upload_date': officialDate,
'title': effTitle,
'stitle': self._simplify_title(effTitle),
'ext': 'mp4',
'format': format,
'thumbnail': None,
'description': 'TODO: Not yet supported',
'player_url': player_url
'description': officialTitle,
'player_url': playerUrl
}
try:
self._downloader.process_info(info)
except UnavailableVideoError, err:
self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum))
self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
continue
class EscapistIE(InfoExtractor):
"""Information extractor for The Escapist """
_VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?].*$'
@staticmethod
def suitable(url):
return (re.match(EscapistIE._VALID_URL, url) is not None)
def report_extraction(self, showName):
self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
def report_config_download(self, showName):
self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
def _simplify_title(self, title):
res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
res = res.strip(ur'_')
return res
def _real_extract(self, url):
htmlParser = HTMLParser.HTMLParser()
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
showName = mobj.group('showname')
videoId = mobj.group('episode')
self.report_extraction(showName)
try:
webPage = urllib2.urlopen(url).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
return
descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
description = htmlParser.unescape(descMatch.group(1))
imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
imgUrl = htmlParser.unescape(imgMatch.group(1))
playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
configUrlMatch = re.search('config=(.*)$', playerUrl)
configUrl = urllib2.unquote(configUrlMatch.group(1))
self.report_config_download(showName)
try:
configJSON = urllib2.urlopen(configUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
return
# Technically, it's JavaScript, not JSON
configJSON = configJSON.replace("'", '"')
try:
config = json.loads(configJSON)
except (ValueError,), err:
self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
return
playlist = config['playlist']
videoUrl = playlist[1]['url']
self._downloader.increment_downloads()
info = {
'id': videoId,
'url': videoUrl,
'uploader': showName,
'upload_date': None,
'title': showName,
'stitle': self._simplify_title(showName),
'ext': 'flv',
'format': 'flv',
'thumbnail': imgUrl,
'description': description,
'player_url': playerUrl,
}
try:
self._downloader.process_info(info)
except UnavailableVideoError, err:
self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
class PostProcessor(object):
"""Post Processor class.
@@ -3381,7 +3492,7 @@ def parseOpts():
kw = {
'version' : __version__,
'formatter' : fmt,
'usage' : '%prog [options] url...',
'usage' : '%prog [options] url [url...]',
'conflict_handler' : 'resolve',
}
@@ -3389,6 +3500,7 @@ def parseOpts():
# option groups
general = optparse.OptionGroup(parser, 'General Options')
selection = optparse.OptionGroup(parser, 'Video Selection')
authentication = optparse.OptionGroup(parser, 'Authentication Options')
video_format = optparse.OptionGroup(parser, 'Video Format Options')
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
@@ -3407,14 +3519,17 @@ def parseOpts():
dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
general.add_option('-R', '--retries',
dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
general.add_option('--playlist-start',
dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
general.add_option('--playlist-end',
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
general.add_option('--dump-user-agent',
action='store_true', dest='dump_user_agent',
help='display the current browser identification', default=False)
selection.add_option('--playlist-start',
dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
selection.add_option('--playlist-end',
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
authentication.add_option('-u', '--username',
dest='username', metavar='USERNAME', help='account username')
authentication.add_option('-p', '--password',
@@ -3492,6 +3607,7 @@ def parseOpts():
parser.add_option_group(general)
parser.add_option_group(selection)
parser.add_option_group(filesystem)
parser.add_option_group(verbosity)
parser.add_option_group(video_format)
@@ -3581,24 +3697,30 @@ def main():
# Information extractors
youtube_ie = YoutubeIE()
metacafe_ie = MetacafeIE(youtube_ie)
dailymotion_ie = DailymotionIE()
youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
youtube_user_ie = YoutubeUserIE(youtube_ie)
youtube_search_ie = YoutubeSearchIE(youtube_ie)
google_ie = GoogleIE()
google_search_ie = GoogleSearchIE(google_ie)
photobucket_ie = PhotobucketIE()
yahoo_ie = YahooIE()
yahoo_search_ie = YahooSearchIE(yahoo_ie)
deposit_files_ie = DepositFilesIE()
facebook_ie = FacebookIE()
bliptv_ie = BlipTVIE()
vimeo_ie = VimeoIE()
myvideo_ie = MyVideoIE()
comedycentral_ie = ComedyCentralIE()
extractors = [ # Order does matter
youtube_ie,
MetacafeIE(youtube_ie),
DailymotionIE(),
YoutubePlaylistIE(youtube_ie),
YoutubeUserIE(youtube_ie),
YoutubeSearchIE(youtube_ie),
google_ie,
GoogleSearchIE(google_ie),
PhotobucketIE(),
yahoo_ie,
YahooSearchIE(yahoo_ie),
DepositFilesIE(),
FacebookIE(),
BlipTVIE(),
VimeoIE(),
MyVideoIE(),
ComedyCentralIE(),
EscapistIE(),
generic_ie = GenericIE()
GenericIE()
]
# File downloader
fd = FileDownloader({
@@ -3638,28 +3760,11 @@ def main():
'updatetime': opts.updatetime,
'writedescription': opts.writedescription,
'writeinfojson': opts.writeinfojson,
'matchtitle': opts.matchtitle,
'rejecttitle': opts.rejecttitle,
})
fd.add_info_extractor(youtube_search_ie)
fd.add_info_extractor(youtube_pl_ie)
fd.add_info_extractor(youtube_user_ie)
fd.add_info_extractor(metacafe_ie)
fd.add_info_extractor(dailymotion_ie)
fd.add_info_extractor(youtube_ie)
fd.add_info_extractor(google_ie)
fd.add_info_extractor(google_search_ie)
fd.add_info_extractor(photobucket_ie)
fd.add_info_extractor(yahoo_ie)
fd.add_info_extractor(yahoo_search_ie)
fd.add_info_extractor(deposit_files_ie)
fd.add_info_extractor(facebook_ie)
fd.add_info_extractor(bliptv_ie)
fd.add_info_extractor(vimeo_ie)
fd.add_info_extractor(myvideo_ie)
fd.add_info_extractor(comedycentral_ie)
# This must come last since it's the
# fallback if none of the others work
fd.add_info_extractor(generic_ie)
for extractor in extractors:
fd.add_info_extractor(extractor)
# PostProcessors
if opts.extractaudio: