Bump version number

Update README
Add --match-title and --reject-title (Closes #132 )
2026-01-24 00:00:10 -05:00 · 2011-09-14 22:55:26 +02:00 · 2011-09-14 22:55:09 +02:00 · 2011-09-14 22:54:51 +02:00 · 2011-09-14 22:26:53 +02:00 · 2011-09-14 21:39:41 +02:00
3 changed files with 187 additions and 76 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-2011.09.13
+2011.09.14
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # youtube-dl

 ## USAGE
-youtube-dl [OPTIONS] URL
+youtube-dl [options] url [url...]

 ## DESCRIPTION
 **youtube-dl** is a small command-line program to download videos from
@@ -17,9 +17,15 @@ which means you can modify it, redistribute it or use it however you like.
    -i, --ignore-errors      continue on download errors
    -r, --rate-limit LIMIT   download rate limit (e.g. 50k or 44.6m)
    -R, --retries RETRIES    number of retries (default is 10)
+    --dump-user-agent        display the current browser identification
+
+### Video Selection:
    --playlist-start NUMBER  playlist video to start at (default is 1)
    --playlist-end NUMBER    playlist video to end at (default is last)
-    --dump-user-agent        display the current browser identification
+    --match-title REGEX      download only matching titles (regex or caseless
+                             sub-string)
+    --reject-title REGEX     skip download for matching titles (regex or
+                             caseless sub-string)

 ### Filesystem Options:
    -t, --title              use title in file name
@@ -68,7 +74,7 @@ which means you can modify it, redistribute it or use it however you like.

 Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it.

-### I get HTTP error 402 when trying to download a video. What''s this?
+### I get HTTP error 402 when trying to download a video. What's this?

 Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.

--- a/249
+++ b/249
@@ -15,7 +15,7 @@ __author__  = (
 	)

 __license__ = 'Public Domain'
-__version__ = '2011.09.14'
+__version__ = '2011.09.15'

 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'

@@ -23,6 +23,7 @@ import cookielib
 import datetime
 import gzip
 import htmlentitydefs
+import HTMLParser
 import httplib
 import locale
 import math
@@ -437,6 +438,8 @@ class FileDownloader(object):
 	noprogress:       Do not print the progress bar.
 	playliststart:    Playlist item to start at.
 	playlistend:      Playlist item to end at.
+	matchtitle:       Download only matching titles.
+	rejecttitle:      Reject downloads for matching titles.
 	logtostderr:      Log messages to stderr instead of stdout.
 	consoletitle:     Display progress in console window's titlebar.
 	nopart:           Do not use temporary .part files.
@@ -712,6 +715,17 @@ class FileDownloader(object):

 		if filename is None:
 			return
+
+		matchtitle=self.params.get('matchtitle',False)
+		rejecttitle=self.params.get('rejecttitle',False)
+		title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
+		if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
+			self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle))
+			return
+		if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
+			self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle))
+			return
+			
 		if self.params.get('nooverwrites', False) and os.path.exists(filename):
 			self.to_stderr(u'WARNING: file exists and will be skipped')
 			return
@@ -822,7 +836,7 @@ class FileDownloader(object):
 		# Download using rtmpdump. rtmpdump returns exit code 2 when
 		# the connection was interrumpted and resuming appears to be
 		# possible. This is part of rtmpdump's normal usage, AFAIK.
-		basic_args = ['rtmpdump'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
+		basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 		retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 		while retval == 2 or retval == 1:
 			prevsize = os.path.getsize(tmpfilename)
@@ -832,6 +846,11 @@ class FileDownloader(object):
 			cursize = os.path.getsize(tmpfilename)
 			if prevsize == cursize and retval == 1:
 				break
+			 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
+			if prevsize == cursize and retval == 2 and cursize > 1024:
+				self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
+				retval = 0
+				break
 		if retval == 0:
 			self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
 			self.try_rename(tmpfilename, filename)
@@ -3055,6 +3074,9 @@ class ComedyCentralIE(InfoExtractor):
 	def report_config_download(self, episode_id):
 		self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)

+	def report_index_download(self, episode_id):
+		self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
+
 	def report_player_url(self, episode_id):
 		self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)

@@ -3102,36 +3124,38 @@ class ComedyCentralIE(InfoExtractor):
 				return
 			epTitle = mobj.group('episode')

-		mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/(.*?:episode:([^:]*):)(.*?))"/>', html)
+		mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"/>', html)
 		if len(mMovieParams) == 0:
 			self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
 			return
-		show_id = mMovieParams[0][2]
-		ACT_COUNT = { # TODO: Detect this dynamically
-			'thedailyshow.com': 4,
-			'colbertnation.com': 3,
-		}.get(show_id, 4)
-		OFFSET = {
-			'thedailyshow.com': 1,
-			'colbertnation.com': 1,
-		}.get(show_id, 1)

-		first_player_url = mMovieParams[0][0]
-		startMediaNum = int(mMovieParams[0][3]) + OFFSET
-		movieId = mMovieParams[0][1]
-
-		playerReq = urllib2.Request(first_player_url)
+		playerUrl_raw = mMovieParams[0][0]
 		self.report_player_url(epTitle)
 		try:
-			playerResponse = urllib2.urlopen(playerReq)
+			urlHandle = urllib2.urlopen(playerUrl_raw)
+			playerUrl = urlHandle.geturl()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download player: %s' % unicode(err))
+			self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
 			return
-		player_url = playerResponse.geturl()

-		for actNum in range(ACT_COUNT):
-			mediaNum = startMediaNum + actNum
-			mediaId = movieId + str(mediaNum)
+		uri = mMovieParams[0][1]
+		indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
+		self.report_index_download(epTitle)
+		try:
+			indexXml = urllib2.urlopen(indexUrl).read()
+		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+			self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
+			return
+
+		idoc = xml.etree.ElementTree.fromstring(indexXml)
+		itemEls = idoc.findall('.//item')
+		for itemEl in itemEls:
+			mediaId = itemEl.findall('./guid')[0].text
+			shortMediaId = mediaId.split(':')[-1]
+			showId = mediaId.split(':')[-2].replace('.com', '')
+			officialTitle = itemEl.findall('./title')[0].text
+			officialDate = itemEl.findall('./pubDate')[0].text
+
 			configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
 						urllib.urlencode({'uri': mediaId}))
 			configReq = urllib2.Request(configUrl)
@@ -3149,7 +3173,7 @@ class ComedyCentralIE(InfoExtractor):
 				turls.append(finfo)

 			if len(turls) == 0:
-				self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum) + ': No videos found')
+				self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
 				continue

 			# For now, just pick the highest bitrate
@@ -3157,28 +3181,115 @@ class ComedyCentralIE(InfoExtractor):

 			self._downloader.increment_downloads()

-			effTitle = show_id.replace('.com', '') + '-' + epTitle
+			effTitle = showId + '-' + epTitle
 			info = {
-				'id': str(mediaNum),
+				'id': shortMediaId,
 				'url': video_url,
-				'uploader': show_id,
-				'upload_date': 'NA',
+				'uploader': showId,
+				'upload_date': officialDate,
 				'title': effTitle,
 				'stitle': self._simplify_title(effTitle),
 				'ext': 'mp4',
 				'format': format,
 				'thumbnail': None,
-				'description': 'TODO: Not yet supported',
-				'player_url': player_url
+				'description': officialTitle,
+				'player_url': playerUrl
 			}

 			try:
 				self._downloader.process_info(info)
 			except UnavailableVideoError, err:
-				self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum))
+				self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
 				continue


+class EscapistIE(InfoExtractor):
+	"""Information extractor for The Escapist """
+
+	_VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?].*$'
+
+	@staticmethod
+	def suitable(url):
+		return (re.match(EscapistIE._VALID_URL, url) is not None)
+
+	def report_extraction(self, showName):
+		self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
+
+	def report_config_download(self, showName):
+		self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
+
+	def _simplify_title(self, title):
+		res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
+		res = res.strip(ur'_')
+		return res
+
+	def _real_extract(self, url):
+		htmlParser = HTMLParser.HTMLParser()
+
+		mobj = re.match(self._VALID_URL, url)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+			return
+		showName = mobj.group('showname')
+		videoId = mobj.group('episode')
+
+		self.report_extraction(showName)
+		try:
+			webPage = urllib2.urlopen(url).read()
+		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+			self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
+			return
+
+		descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
+		description = htmlParser.unescape(descMatch.group(1))
+		imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
+		imgUrl = htmlParser.unescape(imgMatch.group(1))
+		playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
+		playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
+		configUrlMatch = re.search('config=(.*)$', playerUrl)
+		configUrl = urllib2.unquote(configUrlMatch.group(1))
+
+		self.report_config_download(showName)
+		try:
+			configJSON = urllib2.urlopen(configUrl).read()
+		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+			self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
+			return
+
+		# Technically, it's JavaScript, not JSON
+		configJSON = configJSON.replace("'", '"')
+
+		try:
+			config = json.loads(configJSON)
+		except (ValueError,), err:
+			self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
+			return
+
+		playlist = config['playlist']
+		videoUrl = playlist[1]['url']
+
+		self._downloader.increment_downloads()
+		info = {
+			'id': videoId,
+			'url': videoUrl,
+			'uploader': showName,
+			'upload_date': None,
+			'title': showName,
+			'stitle': self._simplify_title(showName),
+			'ext': 'flv',
+			'format': 'flv',
+			'thumbnail': imgUrl,
+			'description': description,
+			'player_url': playerUrl,
+		}
+
+		try:
+			self._downloader.process_info(info)
+		except UnavailableVideoError, err:
+			self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
+
+
+
 class PostProcessor(object):
 	"""Post Processor class.

@@ -3381,7 +3492,7 @@ def parseOpts():
 	kw = {
 		'version'   : __version__,
 		'formatter' : fmt,
-		'usage' : '%prog [options] url...',
+		'usage' : '%prog [options] url [url...]',
 		'conflict_handler' : 'resolve',
 	}

@@ -3389,6 +3500,7 @@ def parseOpts():

 	# option groups
 	general        = optparse.OptionGroup(parser, 'General Options')
+	selection      = optparse.OptionGroup(parser, 'Video Selection')
 	authentication = optparse.OptionGroup(parser, 'Authentication Options')
 	video_format   = optparse.OptionGroup(parser, 'Video Format Options')
 	postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
@@ -3407,14 +3519,17 @@ def parseOpts():
 			dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
 	general.add_option('-R', '--retries',
 			dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
-	general.add_option('--playlist-start',
-			dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
-	general.add_option('--playlist-end',
-			dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
 	general.add_option('--dump-user-agent',
 			action='store_true', dest='dump_user_agent',
 			help='display the current browser identification', default=False)

+	selection.add_option('--playlist-start',
+			dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
+	selection.add_option('--playlist-end',
+			dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
+	selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
+	selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
+
 	authentication.add_option('-u', '--username',
 			dest='username', metavar='USERNAME', help='account username')
 	authentication.add_option('-p', '--password',
@@ -3492,6 +3607,7 @@ def parseOpts():


 	parser.add_option_group(general)
+	parser.add_option_group(selection)
 	parser.add_option_group(filesystem)
 	parser.add_option_group(verbosity)
 	parser.add_option_group(video_format)
@@ -3581,24 +3697,30 @@ def main():

 	# Information extractors
 	youtube_ie = YoutubeIE()
-	metacafe_ie = MetacafeIE(youtube_ie)
-	dailymotion_ie = DailymotionIE()
-	youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
-	youtube_user_ie = YoutubeUserIE(youtube_ie)
-	youtube_search_ie = YoutubeSearchIE(youtube_ie)
 	google_ie = GoogleIE()
-	google_search_ie = GoogleSearchIE(google_ie)
-	photobucket_ie = PhotobucketIE()
 	yahoo_ie = YahooIE()
-	yahoo_search_ie = YahooSearchIE(yahoo_ie)
-	deposit_files_ie = DepositFilesIE()
-	facebook_ie = FacebookIE()
-	bliptv_ie = BlipTVIE()
-	vimeo_ie = VimeoIE()
-	myvideo_ie = MyVideoIE()
-	comedycentral_ie = ComedyCentralIE()
+	extractors = [ # Order does matter
+		youtube_ie,
+		MetacafeIE(youtube_ie),
+		DailymotionIE(),
+		YoutubePlaylistIE(youtube_ie),
+		YoutubeUserIE(youtube_ie),
+		YoutubeSearchIE(youtube_ie),
+		google_ie,
+		GoogleSearchIE(google_ie),
+		PhotobucketIE(),
+		yahoo_ie,
+		YahooSearchIE(yahoo_ie),
+		DepositFilesIE(),
+		FacebookIE(),
+		BlipTVIE(),
+		VimeoIE(),
+		MyVideoIE(),
+		ComedyCentralIE(),
+		EscapistIE(),

-	generic_ie = GenericIE()
+		GenericIE()
+	]

 	# File downloader
 	fd = FileDownloader({
@@ -3638,28 +3760,11 @@ def main():
 		'updatetime': opts.updatetime,
 		'writedescription': opts.writedescription,
 		'writeinfojson': opts.writeinfojson,
+		'matchtitle': opts.matchtitle,
+		'rejecttitle': opts.rejecttitle,
 		})
-	fd.add_info_extractor(youtube_search_ie)
-	fd.add_info_extractor(youtube_pl_ie)
-	fd.add_info_extractor(youtube_user_ie)
-	fd.add_info_extractor(metacafe_ie)
-	fd.add_info_extractor(dailymotion_ie)
-	fd.add_info_extractor(youtube_ie)
-	fd.add_info_extractor(google_ie)
-	fd.add_info_extractor(google_search_ie)
-	fd.add_info_extractor(photobucket_ie)
-	fd.add_info_extractor(yahoo_ie)
-	fd.add_info_extractor(yahoo_search_ie)
-	fd.add_info_extractor(deposit_files_ie)
-	fd.add_info_extractor(facebook_ie)
-	fd.add_info_extractor(bliptv_ie)
-	fd.add_info_extractor(vimeo_ie)
-	fd.add_info_extractor(myvideo_ie)
-	fd.add_info_extractor(comedycentral_ie)
-
-	# This must come last since it's the
-	# fallback if none of the others work
-	fd.add_info_extractor(generic_ie)
+	for extractor in extractors:
+		fd.add_info_extractor(extractor)

 	# PostProcessors
 	if opts.extractaudio:
Author	SHA1	Message	Date
Philipp Hagemeister	e5b9fac281	Bump version number	2011-09-14 22:55:26 +02:00
Philipp Hagemeister	08c1d0d3bc	Update README	2011-09-14 22:55:09 +02:00
Anand Babu Periasamy	20e91e8375	Add --match-title and --reject-title (Closes #132 )	2011-09-14 22:54:51 +02:00
Philipp Hagemeister	f9c6878714	Support for The Escapist	2011-09-14 22:26:53 +02:00
Philipp Hagemeister	8c5dc3ad40	Simplify IE index	2011-09-14 21:39:41 +02:00
Philipp Hagemeister	1d2e86aed9	Decapitalize options in README for consistency with youtube-dl --help	2011-09-14 21:20:23 +02:00
Philipp Hagemeister	a2f7e3a5bb	Clarify usage	2011-09-14 21:19:33 +02:00
Philipp Hagemeister	f2a3a3522c	typo in README	2011-09-14 21:18:22 +02:00
Philipp Hagemeister	b487ef0833	Fully implement comedycentral downloader	2011-09-14 21:17:05 +02:00
Philipp Hagemeister	d0922f29a3	Update LATEST_VERSION (oops)	2011-09-14 00:04:46 +02:00
@@ -1 +1 @@
 .09.13
 .09.14