mirror of
https://gitlab.com/ytdl-org/youtube-dl.git
synced 2026-01-25 00:00:04 -05:00
Compare commits
119 Commits
2016.03.26
...
2016.04.06
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c41cf65d4a | ||
|
|
ec4a4c6fcc | ||
|
|
be0c7009fb | ||
|
|
92d5477d84 | ||
|
|
8790249c68 | ||
|
|
416930d450 | ||
|
|
65150b41bb | ||
|
|
e42f413716 | ||
|
|
40a056d85d | ||
|
|
e7d77efb9d | ||
|
|
995cf05c96 | ||
|
|
5bf28d7864 | ||
|
|
8c7d6e8e22 | ||
|
|
6d4fc66bfc | ||
|
|
23576edbfc | ||
|
|
4d4cd35f48 | ||
|
|
3aac9b2fb1 | ||
|
|
e47d19e991 | ||
|
|
41f5492fbc | ||
|
|
2defa7d75a | ||
|
|
bbc26c8a01 | ||
|
|
b507cc925b | ||
|
|
db8ee7ec05 | ||
|
|
08136dc138 | ||
|
|
fe7ef95e91 | ||
|
|
5f705baf5e | ||
|
|
0750b2491f | ||
|
|
df634be2ed | ||
|
|
6d628fafca | ||
|
|
0f28777f58 | ||
|
|
329c1eae54 | ||
|
|
9aaaf8e8e8 | ||
|
|
04819db58e | ||
|
|
79ba9140dc | ||
|
|
75d572e9fb | ||
|
|
791d6aaecc | ||
|
|
81de73e5b4 | ||
|
|
83cedc1cf2 | ||
|
|
244cd04237 | ||
|
|
fbdaced256 | ||
|
|
a3373823e1 | ||
|
|
03caa463e7 | ||
|
|
3f64379eda | ||
|
|
3e0c3d14d9 | ||
|
|
d8873d4def | ||
|
|
db1c969da5 | ||
|
|
1e02bc7ba2 | ||
|
|
63c55e9f22 | ||
|
|
f9b1529af8 | ||
|
|
961fc024d2 | ||
|
|
b53a06e3b9 | ||
|
|
4ecc1fc638 | ||
|
|
5b012dfce8 | ||
|
|
8369942773 | ||
|
|
86f3b66cec | ||
|
|
6bb4600717 | ||
|
|
41d06b0424 | ||
|
|
15d260ebaa | ||
|
|
ed0291d153 | ||
|
|
81da8cbc45 | ||
|
|
5299bc3f91 | ||
|
|
c9c39c22c5 | ||
|
|
d84b48e3f1 | ||
|
|
dd17041c82 | ||
|
|
fea7295b14 | ||
|
|
9cf01f7f30 | ||
|
|
ce548296fe | ||
|
|
c02ec7d430 | ||
|
|
6b820a2376 | ||
|
|
e621a344e6 | ||
|
|
3ae6f8fec1 | ||
|
|
597d52fadb | ||
|
|
afca767d19 | ||
|
|
6e359a1534 | ||
|
|
607619bc90 | ||
|
|
0b7bfc9422 | ||
|
|
7168a6c874 | ||
|
|
034947dd1e | ||
|
|
3c0de33ad7 | ||
|
|
89924f8230 | ||
|
|
a39c68f7e5 | ||
|
|
4a5a67ca25 | ||
|
|
8751da85a7 | ||
|
|
3bf1df51fd | ||
|
|
3842a3e652 | ||
|
|
7710bdf4e8 | ||
|
|
8d9dd3c34b | ||
|
|
33f3040a3e | ||
|
|
03442072c0 | ||
|
|
c8b13fec02 | ||
|
|
87d105ac6c | ||
|
|
3454139576 | ||
|
|
3a23bae9cc | ||
|
|
8f9a477e7f | ||
|
|
a1cf3e38a3 | ||
|
|
a122e7080b | ||
|
|
b22ca76204 | ||
|
|
f7df343b4a | ||
|
|
19dbaeece3 | ||
|
|
395fd4b08a | ||
|
|
8018028d0f | ||
|
|
00322ad4fd | ||
|
|
4cf3489c6e | ||
|
|
b24ab3e341 | ||
|
|
af4116f4f0 | ||
|
|
f973e5d54e | ||
|
|
62f55aa68a | ||
|
|
02d7634d24 | ||
|
|
48dce58ca9 | ||
|
|
efcba804f6 | ||
|
|
6dee688e6d | ||
|
|
eedb7ba536 | ||
|
|
dcf77cf1a7 | ||
|
|
17bcc626bf | ||
|
|
b5a5bbf376 | ||
|
|
e68d3a010f | ||
|
|
d10fe8358c | ||
|
|
d6c340cae5 | ||
|
|
5964b598ff |
58
.github/ISSUE_TEMPLATE.md
vendored
Normal file
58
.github/ISSUE_TEMPLATE.md
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
## Please follow the guide below
|
||||
|
||||
- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
|
||||
- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x])
|
||||
- Use *Preview* tab to see how your issue will actually look like
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.06**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
|
||||
|
||||
### What is the purpose of your *issue*?
|
||||
- [ ] Bug report (encountered problems with youtube-dl)
|
||||
- [ ] Site support request (request for adding support for a new site)
|
||||
- [ ] Feature request (request for a new functionality)
|
||||
- [ ] Question
|
||||
- [ ] Other
|
||||
|
||||
---
|
||||
|
||||
### The following sections concretize particular purposed issues, you can erase any section (the contents between triple ---) not applicable to your *issue*
|
||||
|
||||
---
|
||||
|
||||
### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
|
||||
|
||||
Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
|
||||
```
|
||||
$ youtube-dl -v <your command line>
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.04.06
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
...
|
||||
<end of log>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**):
|
||||
- Single video: https://www.youtube.com/watch?v=BaW_jenozKc
|
||||
- Single video: https://youtu.be/BaW_jenozKc
|
||||
- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
|
||||
|
||||
---
|
||||
|
||||
### Description of your *issue*, suggested solution and other information
|
||||
|
||||
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
|
||||
If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
|
||||
58
.github/ISSUE_TEMPLATE_tmpl.md
vendored
Normal file
58
.github/ISSUE_TEMPLATE_tmpl.md
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
## Please follow the guide below
|
||||
|
||||
- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
|
||||
- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x])
|
||||
- Use *Preview* tab to see how your issue will actually look like
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
|
||||
|
||||
### What is the purpose of your *issue*?
|
||||
- [ ] Bug report (encountered problems with youtube-dl)
|
||||
- [ ] Site support request (request for adding support for a new site)
|
||||
- [ ] Feature request (request for a new functionality)
|
||||
- [ ] Question
|
||||
- [ ] Other
|
||||
|
||||
---
|
||||
|
||||
### The following sections concretize particular purposed issues, you can erase any section (the contents between triple ---) not applicable to your *issue*
|
||||
|
||||
---
|
||||
|
||||
### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
|
||||
|
||||
Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
|
||||
```
|
||||
$ youtube-dl -v <your command line>
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version %(version)s
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
...
|
||||
<end of log>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**):
|
||||
- Single video: https://www.youtube.com/watch?v=BaW_jenozKc
|
||||
- Single video: https://youtu.be/BaW_jenozKc
|
||||
- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
|
||||
|
||||
---
|
||||
|
||||
### Description of your *issue*, suggested solution and other information
|
||||
|
||||
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
|
||||
If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
|
||||
5
Makefile
5
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
@@ -59,6 +59,9 @@ README.md: youtube_dl/*.py youtube_dl/*/*.py
|
||||
CONTRIBUTING.md: README.md
|
||||
$(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md
|
||||
|
||||
.github/ISSUE_TEMPLATE.md: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md youtube_dl/version.py
|
||||
$(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md .github/ISSUE_TEMPLATE.md
|
||||
|
||||
supportedsites:
|
||||
$(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md
|
||||
|
||||
|
||||
@@ -600,6 +600,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
||||
- `vcodec`: Name of the video codec in use
|
||||
- `container`: Name of the container format
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native`
|
||||
- `format_id`: A short description of the format
|
||||
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by video hoster.
|
||||
|
||||
|
||||
29
devscripts/make_issue_template.py
Normal file
29
devscripts/make_issue_template.py
Normal file
@@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import optparse
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 2:
|
||||
parser.error('Expected an input and an output filename')
|
||||
|
||||
infile, outfile = args
|
||||
|
||||
with io.open(infile, encoding='utf-8') as inf:
|
||||
issue_template_tmpl = inf.read()
|
||||
|
||||
# Get the version from youtube_dl/version.py without importing the package
|
||||
exec(compile(open('youtube_dl/version.py').read(),
|
||||
'youtube_dl/version.py', 'exec'))
|
||||
|
||||
out = issue_template_tmpl % {'version': locals()['__version__']}
|
||||
|
||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||
outf.write(out)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -45,9 +45,9 @@ fi
|
||||
/bin/echo -e "\n### Changing version in version.py..."
|
||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
||||
|
||||
/bin/echo -e "\n### Committing documentation and youtube_dl/version.py..."
|
||||
make README.md CONTRIBUTING.md supportedsites
|
||||
git add README.md CONTRIBUTING.md docs/supportedsites.md youtube_dl/version.py
|
||||
/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..."
|
||||
make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites
|
||||
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py
|
||||
git commit -m "release $version"
|
||||
|
||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
||||
|
||||
@@ -57,6 +57,7 @@
|
||||
- **AudioBoom**
|
||||
- **audiomack**
|
||||
- **audiomack:album**
|
||||
- **auroravid**: AuroraVid
|
||||
- **Azubu**
|
||||
- **AzubuLive**
|
||||
- **BaiduVideo**: 百度视频
|
||||
@@ -92,12 +93,14 @@
|
||||
- **BYUtv**
|
||||
- **Camdemy**
|
||||
- **CamdemyFolder**
|
||||
- **CamWithHer**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **Canvas**
|
||||
- **CBC**
|
||||
- **CBCPlayer**
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
@@ -118,7 +121,7 @@
|
||||
- **Clubic**
|
||||
- **Clyp**
|
||||
- **cmt.com**
|
||||
- **CNET**
|
||||
- **CNBC**
|
||||
- **CNN**
|
||||
- **CNNArticle**
|
||||
- **CNNBlogs**
|
||||
@@ -134,6 +137,7 @@
|
||||
- **CrooksAndLiars**
|
||||
- **Crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **culturebox.francetvinfo.fr**
|
||||
@@ -376,7 +380,8 @@
|
||||
- **myvideo** (Currently broken)
|
||||
- **MyVidster**
|
||||
- **n-tv.de**
|
||||
- **NationalGeographic**
|
||||
- **natgeo**
|
||||
- **natgeo:channel**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
@@ -416,7 +421,6 @@
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
- **novamov**: NovaMov
|
||||
- **nowness**
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
@@ -618,7 +622,6 @@
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleTask**
|
||||
- **TenPlay**
|
||||
- **TF1**
|
||||
- **TheIntercept**
|
||||
- **TheOnion**
|
||||
@@ -740,6 +743,7 @@
|
||||
- **vlive**
|
||||
- **Vodlocker**
|
||||
- **VoiceRepublic**
|
||||
- **VoxMedia**
|
||||
- **Vporn**
|
||||
- **vpro**: npo.nl and ntr.nl
|
||||
- **VRT**
|
||||
|
||||
@@ -2,5 +2,5 @@
|
||||
universal = True
|
||||
|
||||
[flake8]
|
||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
|
||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/make_issue_template.py,setup.py,build,.git
|
||||
ignore = E402,E501,E731
|
||||
|
||||
@@ -76,6 +76,10 @@ class TestCompat(unittest.TestCase):
|
||||
self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def')
|
||||
|
||||
def test_compat_shlex_split(self):
|
||||
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
|
||||
|
||||
@@ -39,6 +39,8 @@ from .compat import (
|
||||
compat_urllib_request_DataHandler,
|
||||
)
|
||||
from .utils import (
|
||||
age_restricted,
|
||||
args_to_str,
|
||||
ContentTooShortError,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
@@ -58,13 +60,16 @@ from .utils import (
|
||||
PagedList,
|
||||
parse_filesize,
|
||||
PerRequestProxyHandler,
|
||||
PostProcessingError,
|
||||
platform_name,
|
||||
PostProcessingError,
|
||||
preferredencoding,
|
||||
prepend_extension,
|
||||
render_table,
|
||||
replace_extension,
|
||||
SameFileError,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
subtitles_filename,
|
||||
@@ -75,10 +80,6 @@ from .utils import (
|
||||
write_string,
|
||||
YoutubeDLCookieProcessor,
|
||||
YoutubeDLHandler,
|
||||
prepend_extension,
|
||||
replace_extension,
|
||||
args_to_str,
|
||||
age_restricted,
|
||||
)
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
@@ -1229,6 +1230,7 @@ class YoutubeDL(object):
|
||||
t.get('preference'), t.get('width'), t.get('height'),
|
||||
t.get('id'), t.get('url')))
|
||||
for i, t in enumerate(thumbnails):
|
||||
t['url'] = sanitize_url(t['url'])
|
||||
if t.get('width') and t.get('height'):
|
||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||
if t.get('id') is None:
|
||||
@@ -1263,6 +1265,8 @@ class YoutubeDL(object):
|
||||
if subtitles:
|
||||
for _, subtitle in subtitles.items():
|
||||
for subtitle_format in subtitle:
|
||||
if subtitle_format.get('url'):
|
||||
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
||||
if 'ext' not in subtitle_format:
|
||||
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
||||
|
||||
@@ -1292,6 +1296,8 @@ class YoutubeDL(object):
|
||||
if 'url' not in format:
|
||||
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
||||
|
||||
format['url'] = sanitize_url(format['url'])
|
||||
|
||||
if format.get('format_id') is None:
|
||||
format['format_id'] = compat_str(i)
|
||||
else:
|
||||
|
||||
@@ -181,7 +181,8 @@ except ImportError: # Python 2
|
||||
if isinstance(e, dict):
|
||||
e = encode_dict(e)
|
||||
elif isinstance(e, (list, tuple,)):
|
||||
e = encode_list(e)
|
||||
list_e = encode_list(e)
|
||||
e = tuple(list_e) if isinstance(e, tuple) else list_e
|
||||
elif isinstance(e, compat_str):
|
||||
e = e.encode(encoding)
|
||||
return e
|
||||
|
||||
@@ -223,6 +223,12 @@ def write_metadata_tag(stream, metadata):
|
||||
write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
|
||||
|
||||
|
||||
def remove_encrypted_media(media):
|
||||
return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
|
||||
'drmAdditionalHeaderSetId' not in e.attrib,
|
||||
media))
|
||||
|
||||
|
||||
def _add_ns(prop):
|
||||
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
|
||||
|
||||
@@ -244,9 +250,7 @@ class F4mFD(FragmentFD):
|
||||
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
|
||||
if 'id' not in e.attrib:
|
||||
self.report_error('Missing ID in f4m DRM')
|
||||
media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
|
||||
'drmAdditionalHeaderSetId' not in e.attrib,
|
||||
media))
|
||||
media = remove_encrypted_media(media)
|
||||
if not media:
|
||||
self.report_error('Unsupported DRM')
|
||||
return media
|
||||
|
||||
@@ -95,6 +95,7 @@ from .camdemy import (
|
||||
CamdemyIE,
|
||||
CamdemyFolderIE
|
||||
)
|
||||
from .camwithher import CamWithHerIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .canvas import CanvasIE
|
||||
@@ -103,6 +104,7 @@ from .cbc import (
|
||||
CBCPlayerIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
@@ -127,7 +129,7 @@ from .cloudy import CloudyIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
from .cnet import CNETIE
|
||||
from .cnbc import CNBCIE
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNBlogsIE,
|
||||
@@ -437,10 +439,14 @@ from .myspass import MySpassIE
|
||||
from .myvi import MyviIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import NationalGeographicIE
|
||||
from .nationalgeographic import (
|
||||
NationalGeographicIE,
|
||||
NationalGeographicChannelIE,
|
||||
)
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import (
|
||||
CSNNEIE,
|
||||
NBCIE,
|
||||
NBCNewsIE,
|
||||
NBCSportsIE,
|
||||
@@ -490,11 +496,11 @@ from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .nova import NovaIE
|
||||
from .novamov import (
|
||||
NovaMovIE,
|
||||
WholeCloudIE,
|
||||
AuroraVidIE,
|
||||
CloudTimeIE,
|
||||
NowVideoIE,
|
||||
VideoWeedIE,
|
||||
CloudTimeIE,
|
||||
WholeCloudIE,
|
||||
)
|
||||
from .nowness import (
|
||||
NownessIE,
|
||||
@@ -735,7 +741,6 @@ from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .tenplay import TenPlayIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .theintercept import TheInterceptIE
|
||||
@@ -900,6 +905,7 @@ from .vk import (
|
||||
from .vlive import VLiveIE
|
||||
from .vodlocker import VodlockerIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
from .voxmedia import VoxMediaIE
|
||||
from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vube import VubeIE
|
||||
|
||||
@@ -44,6 +44,7 @@ class Abc7NewsIE(InfoExtractor):
|
||||
'contentURL', webpage, 'm3u8 url', fatal=True)
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage).strip()
|
||||
description = self._og_search_description(webpage).strip()
|
||||
|
||||
@@ -1,13 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class AENetworksIE(InfoExtractor):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
|
||||
@@ -16,6 +22,9 @@ class AENetworksIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': "Bet You Didn't Know: Valentine's Day",
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
'timestamp': 1375819729,
|
||||
'upload_date': '20130806',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -25,15 +34,15 @@ class AENetworksIE(InfoExtractor):
|
||||
'expected_warnings': ['JSON-LD'],
|
||||
}, {
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
|
||||
'info_dict': {
|
||||
'id': 'eg47EERs_JsZ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Winter Is Coming',
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'timestamp': 1338306241,
|
||||
'upload_date': '20120529',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
@@ -48,7 +57,7 @@ class AENetworksIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@@ -56,11 +65,23 @@ class AENetworksIE(InfoExtractor):
|
||||
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
|
||||
r"media_url\s*=\s*'([^']+)'"
|
||||
]
|
||||
video_url = self._search_regex(video_url_re, webpage, 'video url')
|
||||
video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url'))
|
||||
query = {'mbr': 'true'}
|
||||
if page_type == 'shows':
|
||||
query['assetTypes'] = 'medium_video_s3'
|
||||
if 'switch=hds' in video_url:
|
||||
query['switch'] = 'hls'
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}),
|
||||
'url': smuggle_url(
|
||||
update_url_query(video_url, query),
|
||||
{
|
||||
'sig': {
|
||||
'key': 'crazyjava',
|
||||
'secret': 's3cr3t'},
|
||||
'force_smil_url': True
|
||||
}),
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -69,12 +69,14 @@ class AMPIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': get_media_node('title'),
|
||||
'description': get_media_node('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': parse_iso8601(item.get('pubDate'), ' '),
|
||||
'timestamp': timestamp,
|
||||
'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
|
||||
@@ -120,6 +120,7 @@ class AzubuLiveIE(InfoExtractor):
|
||||
bc_info = self._download_json(req, user)
|
||||
m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': info['id'],
|
||||
|
||||
@@ -328,6 +328,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'abr': abr,
|
||||
'acodec': acodec,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
@@ -688,6 +689,10 @@ class BBCIE(BBCCoUkIE):
|
||||
# custom redirection to www.bbc.com
|
||||
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -817,7 +822,7 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
[r'data-video-player-vpid="(%s)"' % self._ID_REGEX,
|
||||
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
|
||||
r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
|
||||
r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
|
||||
webpage, 'vpid', default=None)
|
||||
|
||||
@@ -34,7 +34,7 @@ class BeegIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://api.beeg.com/api/v5/video/%s' % video_id, video_id)
|
||||
'https://api.beeg.com/api/v6/1738/video/%s' % video_id, video_id)
|
||||
|
||||
def split(o, e):
|
||||
def cut(s, x):
|
||||
@@ -50,8 +50,8 @@ class BeegIE(InfoExtractor):
|
||||
return n
|
||||
|
||||
def decrypt_key(key):
|
||||
# Reverse engineered from http://static.beeg.com/cpl/1105.js
|
||||
a = '5ShMcIQlssOd7zChAIOlmeTZDaUxULbJRnywYaiB'
|
||||
# Reverse engineered from http://static.beeg.com/cpl/1738.js
|
||||
a = 'GUuyodcfS8FW8gQp4OKLMsZBcX0T7B'
|
||||
e = compat_urllib_parse_unquote(key)
|
||||
o = ''.join([
|
||||
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
|
||||
|
||||
@@ -94,6 +94,7 @@ class BetIE(InfoExtractor):
|
||||
xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
|
||||
|
||||
formats = self._extract_smil_formats(smil_url, display_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -15,6 +15,9 @@ class BravoTVIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Last Chance Kitchen Returns',
|
||||
'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
|
||||
'timestamp': 1448926740,
|
||||
'upload_date': '20151130',
|
||||
'uploader': 'NBCU-BRAV',
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -46,6 +46,9 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
|
||||
'uploader': '8TV',
|
||||
'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
|
||||
'timestamp': 1368213670,
|
||||
'upload_date': '20130510',
|
||||
'uploader_id': '1589608506001',
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -57,6 +60,9 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
|
||||
'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
|
||||
'uploader': 'Oracle',
|
||||
'timestamp': 1344975024,
|
||||
'upload_date': '20120814',
|
||||
'uploader_id': '1460825906',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -68,6 +74,9 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'title': 'This Bracelet Acts as a Personal Thermostat',
|
||||
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
|
||||
'uploader': 'Mashable',
|
||||
'timestamp': 1382041798,
|
||||
'upload_date': '20131017',
|
||||
'uploader_id': '1130468786001',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -85,14 +94,17 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
{
|
||||
# test flv videos served by akamaihd.net
|
||||
# From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3ABC2996102916001&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
|
||||
# The md5 checksum changes on each download
|
||||
'info_dict': {
|
||||
'id': '2996102916001',
|
||||
'id': '3750436379001',
|
||||
'ext': 'flv',
|
||||
'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
|
||||
'uploader': 'Red Bull TV',
|
||||
'uploader': 'RBTV Old (do not use)',
|
||||
'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
|
||||
'timestamp': 1409122195,
|
||||
'upload_date': '20140827',
|
||||
'uploader_id': '710858724001',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -106,6 +118,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
]
|
||||
FLV_VCODECS = {
|
||||
1: 'SORENSON',
|
||||
2: 'ON2',
|
||||
3: 'H264',
|
||||
4: 'VP8',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url(cls, object_str):
|
||||
@@ -136,13 +154,16 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
else:
|
||||
flashvars = {}
|
||||
|
||||
data_url = object_doc.attrib.get('data', '')
|
||||
data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query)
|
||||
|
||||
def find_param(name):
|
||||
if name in flashvars:
|
||||
return flashvars[name]
|
||||
node = find_xpath_attr(object_doc, './param', 'name', name)
|
||||
if node is not None:
|
||||
return node.attrib['value']
|
||||
return None
|
||||
return data_url_params.get(name)
|
||||
|
||||
params = {}
|
||||
|
||||
@@ -286,15 +307,19 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
publisher_id = video_info.get('publisherId')
|
||||
info = {
|
||||
'id': compat_str(video_info['id']),
|
||||
'title': video_info['displayName'].strip(),
|
||||
'description': video_info.get('shortDescription'),
|
||||
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
||||
'uploader': video_info.get('publisherName'),
|
||||
'uploader_id': compat_str(publisher_id) if publisher_id else None,
|
||||
'duration': float_or_none(video_info.get('length'), 1000),
|
||||
'timestamp': int_or_none(video_info.get('creationDate'), 1000),
|
||||
}
|
||||
|
||||
renditions = video_info.get('renditions')
|
||||
renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
|
||||
if renditions:
|
||||
formats = []
|
||||
for rend in renditions:
|
||||
@@ -315,19 +340,42 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
ext = 'flv'
|
||||
if ext is None:
|
||||
ext = determine_ext(url)
|
||||
size = rend.get('size')
|
||||
formats.append({
|
||||
tbr = int_or_none(rend.get('encodingRate'), 1000),
|
||||
a_format = {
|
||||
'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'height': rend.get('frameHeight'),
|
||||
'width': rend.get('frameWidth'),
|
||||
'filesize': size if size != 0 else None,
|
||||
})
|
||||
'filesize': int_or_none(rend.get('size')) or None,
|
||||
'tbr': tbr,
|
||||
}
|
||||
if rend.get('audioOnly'):
|
||||
a_format.update({
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
a_format.update({
|
||||
'height': int_or_none(rend.get('frameHeight')),
|
||||
'width': int_or_none(rend.get('frameWidth')),
|
||||
'vcodec': rend.get('videoCodec'),
|
||||
})
|
||||
|
||||
# m3u8 manifests with remote == false are media playlists
|
||||
# Not calling _extract_m3u8_formats here to save network traffic
|
||||
if ext == 'm3u8':
|
||||
a_format.update({
|
||||
'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
})
|
||||
|
||||
formats.append(a_format)
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
elif video_info.get('FLVFullLengthURL') is not None:
|
||||
info.update({
|
||||
'url': video_info['FLVFullLengthURL'],
|
||||
'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')),
|
||||
'filesize': int_or_none(video_info.get('FLVFullSize')),
|
||||
})
|
||||
|
||||
if self._downloader.params.get('include_ads', False):
|
||||
@@ -383,6 +431,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
'formats': 'mincount:41',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
@@ -426,7 +475,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
</video>.*?
|
||||
<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
(\d+)/([\da-f-]+)_([^/]+)/index(?:\.min)?\.js
|
||||
(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
|
||||
''', webpage):
|
||||
entries.append(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||
@@ -467,7 +516,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
raise ExtractorError(json_data[0]['message'], expected=True)
|
||||
raise
|
||||
|
||||
title = json_data['name']
|
||||
title = json_data['name'].strip()
|
||||
|
||||
formats = []
|
||||
for source in json_data.get('sources', []):
|
||||
@@ -520,7 +569,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
f.update({
|
||||
'url': src or streaming_src,
|
||||
'format_id': build_format_id('http' if src else 'http-streaming'),
|
||||
'preference': 2 if src else 1,
|
||||
'source_preference': 0 if src else -1,
|
||||
})
|
||||
else:
|
||||
f.update({
|
||||
@@ -531,20 +580,22 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = json_data.get('description')
|
||||
thumbnail = json_data.get('thumbnail')
|
||||
timestamp = parse_iso8601(json_data.get('published_at'))
|
||||
duration = float_or_none(json_data.get('duration'), 1000)
|
||||
tags = json_data.get('tags', [])
|
||||
subtitles = {}
|
||||
for text_track in json_data.get('text_tracks', []):
|
||||
if text_track.get('src'):
|
||||
subtitles.setdefault(text_track.get('srclang'), []).append({
|
||||
'url': text_track['src'],
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'description': json_data.get('description'),
|
||||
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
||||
'duration': float_or_none(json_data.get('duration'), 1000),
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
'uploader_id': account_id,
|
||||
'formats': formats,
|
||||
'tags': tags,
|
||||
'subtitles': subtitles,
|
||||
'tags': json_data.get('tags', []),
|
||||
}
|
||||
|
||||
87
youtube_dl/extractor/camwithher.py
Normal file
87
youtube_dl/extractor/camwithher.py
Normal file
@@ -0,0 +1,87 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CamWithHerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=',
|
||||
'info_dict': {
|
||||
'id': '5644',
|
||||
'ext': 'flv',
|
||||
'title': 'Periscope Tease',
|
||||
'description': 'In the clouds teasing on periscope to my favorite song',
|
||||
'duration': 240,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'uploader': 'MileenaK',
|
||||
'upload_date': '20160322',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
flv_id = self._html_search_regex(
|
||||
r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id')
|
||||
|
||||
# Video URL construction algorithm is reverse-engineered from cwhplayer.swf
|
||||
rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % (
|
||||
('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'>Description:</span>(.+?)</div>', webpage, 'description', default=None)
|
||||
|
||||
runtime = self._search_regex(
|
||||
r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None)
|
||||
if runtime:
|
||||
runtime = re.sub(r'[\s-]', '', runtime)
|
||||
duration = parse_duration(runtime)
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'Views\s*:\s*(\d+)', webpage, 'view count', default=None))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None))
|
||||
|
||||
uploader = self._search_regex(
|
||||
r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None))
|
||||
|
||||
return {
|
||||
'id': flv_id,
|
||||
'url': rtmp_url,
|
||||
'ext': 'flv',
|
||||
'no_resume': True,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
@@ -1,24 +1,41 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class CBSIE(InfoExtractor):
|
||||
class CBSBaseIE(ThePlatformIE):
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
|
||||
return {
|
||||
'en': [{
|
||||
'ext': 'ttml',
|
||||
'url': closed_caption_e.attrib['value'],
|
||||
}]
|
||||
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
'info_dict': {
|
||||
'id': '4JUVEwq3wUT7',
|
||||
'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
|
||||
'display_id': 'connect-chat-feat-garth-brooks',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Connect Chat feat. Garth Brooks',
|
||||
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
||||
'duration': 1495,
|
||||
'timestamp': 1385585425,
|
||||
'upload_date': '20131127',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -47,22 +64,46 @@ class CBSIE(InfoExtractor):
|
||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?manifest=m3u&mbr=true'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
request = sanitized_Request(url)
|
||||
# Android UA is served with higher quality (720p) streams (see
|
||||
# https://github.com/rg3/youtube-dl/issues/7490)
|
||||
request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)')
|
||||
webpage = self._download_webpage(request, display_id)
|
||||
real_id = self._search_regex(
|
||||
[r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
|
||||
webpage, 'real video ID')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true&manifest=m3u' % real_id,
|
||||
{'force_smil_url': True}),
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
content_id = self._search_regex(
|
||||
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
|
||||
webpage, 'content id')
|
||||
items_data = self._download_xml(
|
||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
||||
video_data = xpath_element(items_data, './/item')
|
||||
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
||||
|
||||
subtitles = {}
|
||||
formats = []
|
||||
for item in items_data.findall('.//item'):
|
||||
pid = xpath_text(item, 'pid')
|
||||
if not pid:
|
||||
continue
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
self.TP_RELEASE_URL_TEMPLATE % pid, content_id, 'Downloading %s SMIL data' % pid)
|
||||
except ExtractorError:
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self.get_metadata('dJ5BDC/media/guid/2198311517/%s' % content_id, content_id)
|
||||
info.update({
|
||||
'id': content_id,
|
||||
'display_id': display_id,
|
||||
}
|
||||
'title': title,
|
||||
'series': xpath_text(video_data, 'seriesTitle'),
|
||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||
'thumbnail': xpath_text(video_data, 'previewImageURL'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CNETIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
|
||||
class CBSInteractiveIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video/share)/(?P<id>[^/?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||
'info_dict': {
|
||||
@@ -17,6 +19,8 @@ class CNETIE(ThePlatformIE):
|
||||
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
|
||||
'uploader': 'Sarah Mitroff',
|
||||
'duration': 70,
|
||||
'timestamp': 1396479627,
|
||||
'upload_date': '20140402',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
|
||||
@@ -28,15 +32,38 @@ class CNETIE(ThePlatformIE):
|
||||
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
|
||||
'uploader': 'Ashley Esqueda',
|
||||
'duration': 1482,
|
||||
'timestamp': 1433289889,
|
||||
'upload_date': '20150603',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
|
||||
'info_dict': {
|
||||
'id': 'bc1af9f0-a2b5-4e54-880d-0d95525781c0',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video: Keeping Android smartphones and tablets secure',
|
||||
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
|
||||
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
|
||||
'uploader': 'Adrian Kingsley-Hughes',
|
||||
'timestamp': 1448961720,
|
||||
'upload_date': '20151201',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true'
|
||||
MPX_ACCOUNTS = {
|
||||
'cnet': 2288573011,
|
||||
'zdnet': 2387448114,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
data_json = self._html_search_regex(
|
||||
r"data-cnet-video(?:-uvp)?-options='([^']+)'",
|
||||
r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'",
|
||||
webpage, 'data json')
|
||||
data = self._parse_json(data_json, display_id)
|
||||
vdata = data.get('video') or data['videos'][0]
|
||||
@@ -51,16 +78,15 @@ class CNETIE(ThePlatformIE):
|
||||
uploader = None
|
||||
uploader_id = None
|
||||
|
||||
metadata = self.get_metadata('kYEXFC/%s' % list(vdata['files'].values())[0], video_id)
|
||||
description = vdata.get('description') or metadata.get('description')
|
||||
duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
|
||||
formats, subtitles = [], {}
|
||||
if site == 'cnet':
|
||||
formats, subtitles = self._extract_theplatform_smil(
|
||||
self.TP_RELEASE_URL_TEMPLATE % media_guid_path, video_id)
|
||||
for (fkey, vid) in vdata['files'].items():
|
||||
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
||||
continue
|
||||
release_url = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true' % vid
|
||||
release_url = self.TP_RELEASE_URL_TEMPLATE % vid
|
||||
if fkey == 'hds':
|
||||
release_url += '&manifest=f4m'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
|
||||
@@ -68,15 +94,15 @@ class CNETIE(ThePlatformIE):
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
info = self.get_metadata('kYEXFC/%s' % media_guid_path, video_id)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': metadata.get('thumbnail'),
|
||||
'duration': duration,
|
||||
'duration': int_or_none(vdata.get('duration')),
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
return info
|
||||
@@ -2,14 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from .cbs import CBSBaseIE
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class CBSNewsIE(ThePlatformIE):
|
||||
class CBSNewsIE(CBSBaseIE):
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
|
||||
@@ -49,15 +48,6 @@ class CBSNewsIE(ThePlatformIE):
|
||||
},
|
||||
]
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
|
||||
return {
|
||||
'en': [{
|
||||
'ext': 'ttml',
|
||||
'url': closed_caption_e.attrib['value'],
|
||||
}]
|
||||
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -122,6 +112,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
for entry in f4m_formats:
|
||||
# URLs without the extra param induce an 404 error
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
self._sort_formats(f4m_formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -48,6 +48,7 @@ class ChaturbateIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to find stream URL')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
36
youtube_dl/extractor/cnbc.py
Normal file
36
youtube_dl/extractor/cnbc.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class CNBCIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://video.cnbc.com/gallery/?video=3000503714',
|
||||
'info_dict': {
|
||||
'id': '3000503714',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fighting zombies is big business',
|
||||
'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
|
||||
'timestamp': 1459332000,
|
||||
'upload_date': '20160330',
|
||||
'uploader': 'NBCU-CNBC',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
|
||||
{'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
@@ -41,7 +41,13 @@ class ComCarCoffIE(InfoExtractor):
|
||||
|
||||
display_id = full_data['activeVideo']['video']
|
||||
video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
|
||||
|
||||
video_id = compat_str(video_data['mediaId'])
|
||||
title = video_data['title']
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['mediaUrl'], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'url': video_data['images']['thumb'],
|
||||
}, {
|
||||
@@ -54,15 +60,14 @@ class ComCarCoffIE(InfoExtractor):
|
||||
video_data.get('duration'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'crackle:%s' % video_id,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video_data['title'],
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode_number': int_or_none(video_data.get('episode')),
|
||||
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
|
||||
|
||||
@@ -22,8 +22,10 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..downloader.f4m import remove_encrypted_media
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
age_restricted,
|
||||
@@ -48,6 +50,7 @@ from ..utils import (
|
||||
determine_protocol,
|
||||
parse_duration,
|
||||
mimetype2ext,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@@ -346,7 +349,7 @@ class InfoExtractor(object):
|
||||
def IE_NAME(self):
|
||||
return compat_str(type(self).__name__[:-2])
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None):
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
||||
""" Returns the response handle """
|
||||
if note is None:
|
||||
self.report_download_webpage(video_id)
|
||||
@@ -356,11 +359,14 @@ class InfoExtractor(object):
|
||||
else:
|
||||
self.to_screen('%s: %s' % (video_id, note))
|
||||
# data, headers and query params will be ignored for `Request` objects
|
||||
if isinstance(url_or_request, compat_str):
|
||||
if isinstance(url_or_request, compat_urllib_request.Request):
|
||||
url_or_request = update_Request(
|
||||
url_or_request, data=data, headers=headers, query=query)
|
||||
else:
|
||||
if query:
|
||||
url_or_request = update_url_query(url_or_request, query)
|
||||
if data or headers:
|
||||
url_or_request = sanitized_Request(url_or_request, data, headers or {})
|
||||
url_or_request = sanitized_Request(url_or_request, data, headers)
|
||||
try:
|
||||
return self._downloader.urlopen(url_or_request)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
@@ -376,7 +382,7 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers=None, query=None):
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
""" Returns a tuple (page content as string, URL handle) """
|
||||
# Strip hashes from the URL (#1038)
|
||||
if isinstance(url_or_request, (compat_str, str)):
|
||||
@@ -469,7 +475,7 @@ class InfoExtractor(object):
|
||||
|
||||
return content
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers=None, query=None):
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}):
|
||||
""" Returns the data of the page as a string """
|
||||
success = False
|
||||
try_count = 0
|
||||
@@ -490,7 +496,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True, encoding=None, data=None, headers=None, query=None):
|
||||
transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
xml_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
|
||||
@@ -504,7 +510,7 @@ class InfoExtractor(object):
|
||||
note='Downloading JSON metadata',
|
||||
errnote='Unable to download JSON metadata',
|
||||
transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers=None, query=None):
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
json_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
@@ -989,6 +995,11 @@ class InfoExtractor(object):
|
||||
if not media_nodes:
|
||||
manifest_version = '2.0'
|
||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||
# Remove unsupported DRM protected media from final formats
|
||||
# rendition (see https://github.com/rg3/youtube-dl/issues/8573).
|
||||
media_nodes = remove_encrypted_media(media_nodes)
|
||||
if not media_nodes:
|
||||
return formats
|
||||
base_url = xpath_text(
|
||||
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
|
||||
'base URL', default=None)
|
||||
@@ -1021,8 +1032,6 @@ class InfoExtractor(object):
|
||||
'height': int_or_none(media_el.attrib.get('height')),
|
||||
'preference': preference,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
@@ -1143,7 +1152,6 @@ class InfoExtractor(object):
|
||||
last_media = None
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
@staticmethod
|
||||
@@ -1317,8 +1325,6 @@ class InfoExtractor(object):
|
||||
})
|
||||
continue
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
@@ -1329,7 +1335,7 @@ class InfoExtractor(object):
|
||||
if not src or src in urls:
|
||||
continue
|
||||
urls.append(src)
|
||||
ext = textstream.get('ext') or determine_ext(src) or mimetype2ext(textstream.get('type'))
|
||||
ext = textstream.get('ext') or mimetype2ext(textstream.get('type')) or determine_ext(src)
|
||||
lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': src,
|
||||
@@ -1509,9 +1515,16 @@ class InfoExtractor(object):
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)(?:%(0\d+)d)?\$', r'%(\1)\2d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)%(\d+)\$', r'%(\1)\2d', media_template)
|
||||
media_template.replace('$$', '$')
|
||||
representation_ms_info['segment_urls'] = [media_template % {'Number': segment_number, 'Bandwidth': representation_attrib.get('bandwidth')} for segment_number in range(representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
representation_ms_info['segment_urls'] = [
|
||||
media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth')}
|
||||
for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
if 'segment_urls' in representation_ms_info:
|
||||
f.update({
|
||||
'segment_urls': representation_ms_info['segment_urls'],
|
||||
@@ -1536,7 +1549,6 @@ class InfoExtractor(object):
|
||||
existing_format.update(f)
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _live_title(self, name):
|
||||
|
||||
@@ -57,6 +57,7 @@ class CWTVIE(InfoExtractor):
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['videos']['variantplaylist']['uri'], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'url': image['uri'],
|
||||
|
||||
@@ -41,7 +41,9 @@ class DeezerPlaylistIE(InfoExtractor):
|
||||
'Deezer said: %s' % geoblocking_msg, expected=True)
|
||||
|
||||
data_json = self._search_regex(
|
||||
r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n', webpage, 'data JSON')
|
||||
(r'__DZR_APP_STATE__\s*=\s*({.+?})\s*</script>',
|
||||
r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'),
|
||||
webpage, 'data JSON')
|
||||
data = json.loads(data_json)
|
||||
|
||||
playlist_title = data.get('DATA', {}).get('TITLE')
|
||||
|
||||
@@ -38,6 +38,7 @@ class DFBIE(InfoExtractor):
|
||||
token_el = f4m_info.find('token')
|
||||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
|
||||
formats = self._extract_f4m_formats(manifest_url, display_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -63,18 +63,23 @@ class DiscoveryIE(InfoExtractor):
|
||||
|
||||
video_title = info.get('playlist_title') or info.get('video_title')
|
||||
|
||||
entries = [{
|
||||
'id': compat_str(video_info['id']),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
entries = []
|
||||
|
||||
for idx, video_info in enumerate(info['playlist']):
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
|
||||
note='Download m3u8 information for video %d' % (idx + 1)),
|
||||
'title': video_info['title'],
|
||||
'description': video_info.get('description'),
|
||||
'duration': parse_duration(video_info.get('video_length')),
|
||||
'webpage_url': video_info.get('href') or video_info.get('url'),
|
||||
'thumbnail': video_info.get('thumbnailURL'),
|
||||
'alt_title': video_info.get('secondary_title'),
|
||||
'timestamp': parse_iso8601(video_info.get('publishedDate')),
|
||||
} for idx, video_info in enumerate(info['playlist'])]
|
||||
note='Download m3u8 information for video %d' % (idx + 1))
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': compat_str(video_info['id']),
|
||||
'formats': formats,
|
||||
'title': video_info['title'],
|
||||
'description': video_info.get('description'),
|
||||
'duration': parse_duration(video_info.get('video_length')),
|
||||
'webpage_url': video_info.get('href') or video_info.get('url'),
|
||||
'thumbnail': video_info.get('thumbnailURL'),
|
||||
'alt_title': video_info.get('secondary_title'),
|
||||
'timestamp': parse_iso8601(video_info.get('publishedDate')),
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, display_id, video_title)
|
||||
|
||||
@@ -118,6 +118,8 @@ class DPlayIE(InfoExtractor):
|
||||
if info.get(protocol):
|
||||
extract_formats(protocol, info[protocol])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
|
||||
@@ -39,13 +39,13 @@ class DWIE(InfoExtractor):
|
||||
hidden_inputs = self._hidden_inputs(webpage)
|
||||
title = hidden_inputs['media_title']
|
||||
|
||||
formats = []
|
||||
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
|
||||
formats = self._extract_smil_formats(
|
||||
'http://www.dw.com/smil/v-%s' % media_id, media_id,
|
||||
transform_source=lambda s: s.replace(
|
||||
'rtmp://tv-od.dw.de/flash/',
|
||||
'http://tv-download.dw.de/dwtv_video/flv/'))
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
formats = [{'url': hidden_inputs['file_name']}]
|
||||
|
||||
|
||||
@@ -16,6 +16,9 @@ class FOXIE(InfoExtractor):
|
||||
'title': 'Official Trailer: Gotham',
|
||||
'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.',
|
||||
'duration': 129,
|
||||
'timestamp': 1400020798,
|
||||
'upload_date': '20140513',
|
||||
'uploader': 'NEWA-FNG-FOXCOM',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
|
||||
@@ -18,8 +18,8 @@ class FoxNewsIE(AMPIE):
|
||||
'title': 'Frozen in Time',
|
||||
'description': '16-year-old girl is size of toddler',
|
||||
'duration': 265,
|
||||
# 'timestamp': 1304411491,
|
||||
# 'upload_date': '20110503',
|
||||
'timestamp': 1304411491,
|
||||
'upload_date': '20110503',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
@@ -32,8 +32,8 @@ class FoxNewsIE(AMPIE):
|
||||
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
|
||||
'description': "Congressman discusses president's plan",
|
||||
'duration': 292,
|
||||
# 'timestamp': 1417662047,
|
||||
# 'upload_date': '20141204',
|
||||
'timestamp': 1417662047,
|
||||
'upload_date': '20141204',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
|
||||
@@ -406,19 +406,6 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# multiple ooyala embeds on SBN network websites
|
||||
{
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
'info_dict': {
|
||||
'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
},
|
||||
# embed.ly video
|
||||
{
|
||||
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
|
||||
@@ -1124,7 +1111,23 @@ class GenericIE(InfoExtractor):
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
},
|
||||
# Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
|
||||
# This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
|
||||
{
|
||||
'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
|
||||
'info_dict': {
|
||||
'id': '4785848093001',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Cardinal Pell Interview',
|
||||
'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
|
||||
'uploader': 'GlobeCast Australia - GlobeStream',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -1294,6 +1297,7 @@ class GenericIE(InfoExtractor):
|
||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||
}]
|
||||
info_dict['direct'] = True
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
return info_dict
|
||||
|
||||
@@ -1320,6 +1324,7 @@ class GenericIE(InfoExtractor):
|
||||
# Is it an M3U playlist?
|
||||
if first_bytes.startswith(b'#EXTM3U'):
|
||||
info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||
self._sort_formats(info_dict['formats'])
|
||||
return info_dict
|
||||
|
||||
# Maybe it's a direct link to a video?
|
||||
@@ -1344,15 +1349,19 @@ class GenericIE(InfoExtractor):
|
||||
if doc.tag == 'rss':
|
||||
return self._extract_rss(url, video_id, doc)
|
||||
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
|
||||
return self._parse_smil(doc, url, video_id)
|
||||
smil = self._parse_smil(doc, url, video_id)
|
||||
self._sort_formats(smil['formats'])
|
||||
return smil
|
||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'] = self._parse_mpd_formats(
|
||||
doc, video_id, mpd_base_url=url.rpartition('/')[0])
|
||||
self._sort_formats(info_dict['formats'])
|
||||
return info_dict
|
||||
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
||||
info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
|
||||
self._sort_formats(info_dict['formats'])
|
||||
return info_dict
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
@@ -2037,6 +2046,9 @@ class GenericIE(InfoExtractor):
|
||||
else:
|
||||
entry_info_dict['url'] = video_url
|
||||
|
||||
if entry_info_dict.get('formats'):
|
||||
self._sort_formats(entry_info_dict['formats'])
|
||||
|
||||
entries.append(entry_info_dict)
|
||||
|
||||
if len(entries) == 1:
|
||||
|
||||
@@ -6,6 +6,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unescapeHTML,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -39,7 +40,7 @@ class HowStuffWorksIE(InfoExtractor):
|
||||
'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
|
||||
'info_dict': {
|
||||
'id': '440011',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sword Swallowing #1 by Dan Meyer',
|
||||
'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
|
||||
'display_id': 'sword-swallowing-1-by-dan-meyer',
|
||||
@@ -63,13 +64,19 @@ class HowStuffWorksIE(InfoExtractor):
|
||||
video_id = clip_info['content_id']
|
||||
formats = []
|
||||
m3u8_url = clip_info.get('m3u8')
|
||||
if m3u8_url:
|
||||
formats += self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', format_id='hls', fatal=True))
|
||||
flv_url = clip_info.get('flv_url')
|
||||
if flv_url:
|
||||
formats.append({
|
||||
'url': flv_url,
|
||||
'format_id': 'flv',
|
||||
})
|
||||
for video in clip_info.get('mp4', []):
|
||||
formats.append({
|
||||
'url': video['src'],
|
||||
'format_id': video['bitrate'],
|
||||
'vbr': int(video['bitrate'].rstrip('k')),
|
||||
'format_id': 'mp4-%s' % video['bitrate'],
|
||||
'vbr': int_or_none(video['bitrate'].rstrip('k')),
|
||||
})
|
||||
|
||||
if not formats:
|
||||
@@ -102,6 +109,6 @@ class HowStuffWorksIE(InfoExtractor):
|
||||
'title': unescapeHTML(clip_info['clip_title']),
|
||||
'description': unescapeHTML(clip_info.get('caption')),
|
||||
'thumbnail': clip_info.get('video_still_url'),
|
||||
'duration': clip_info.get('duration'),
|
||||
'duration': int_or_none(clip_info.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -152,7 +152,7 @@ class InstagramUserIE(InfoExtractor):
|
||||
|
||||
if not page['items']:
|
||||
break
|
||||
max_id = page['items'][-1]['id']
|
||||
max_id = page['items'][-1]['id'].split('_')[0]
|
||||
media_url = (
|
||||
'http://instagram.com/%s/media?max_id=%s' % (
|
||||
uploader_id, max_id))
|
||||
|
||||
@@ -368,7 +368,10 @@ class IqiyiIE(InfoExtractor):
|
||||
auth_req, video_id,
|
||||
note='Downloading video authentication JSON',
|
||||
errnote='Unable to download video authentication JSON')
|
||||
if auth_result['code'] == 'Q00506': # requires a VIP account
|
||||
|
||||
if auth_result['code'] == 'Q00505': # No preview available (不允许试看鉴权失败)
|
||||
raise ExtractorError('This video requires a VIP account', expected=True)
|
||||
if auth_result['code'] == 'Q00506': # End of preview time (试看结束鉴权失败)
|
||||
if do_report_warning:
|
||||
self.report_warning('Needs a VIP account for full video')
|
||||
return False
|
||||
|
||||
@@ -26,10 +26,23 @@ class KuwoBaseIE(InfoExtractor):
|
||||
def _get_formats(self, song_id, tolerate_ip_deny=False):
|
||||
formats = []
|
||||
for file_format in self._FORMATS:
|
||||
headers = {}
|
||||
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||
if cn_verification_proxy:
|
||||
headers['Ytdl-request-proxy'] = cn_verification_proxy
|
||||
|
||||
query = {
|
||||
'format': file_format['ext'],
|
||||
'br': file_format.get('br', ''),
|
||||
'rid': 'MUSIC_%s' % song_id,
|
||||
'type': 'convert_url',
|
||||
'response': 'url'
|
||||
}
|
||||
|
||||
song_url = self._download_webpage(
|
||||
'http://antiserver.kuwo.cn/anti.s?format=%s&br=%s&rid=MUSIC_%s&type=convert_url&response=url' %
|
||||
(file_format['ext'], file_format.get('br', ''), song_id),
|
||||
'http://antiserver.kuwo.cn/anti.s',
|
||||
song_id, note='Download %s url info' % file_format['format'],
|
||||
query=query, headers=headers,
|
||||
)
|
||||
|
||||
if song_url == 'IPDeny' and not tolerate_ip_deny:
|
||||
@@ -44,18 +57,13 @@ class KuwoBaseIE(InfoExtractor):
|
||||
'abr': file_format.get('abr'),
|
||||
})
|
||||
|
||||
# XXX _sort_formats fails if there are not formats, while it's not the
|
||||
# desired behavior if 'IPDeny' is ignored
|
||||
# This check can be removed if https://github.com/rg3/youtube-dl/pull/8051 is merged
|
||||
if not tolerate_ip_deny:
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
|
||||
class KuwoIE(KuwoBaseIE):
|
||||
IE_NAME = 'kuwo:song'
|
||||
IE_DESC = '酷我音乐'
|
||||
_VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+?)'
|
||||
_VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kuwo.cn/yinyue/635632/',
|
||||
'info_dict': {
|
||||
@@ -103,6 +111,7 @@ class KuwoIE(KuwoBaseIE):
|
||||
lrc_content = None
|
||||
|
||||
formats = self._get_formats(song_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
album_id = self._html_search_regex(
|
||||
r'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
|
||||
|
||||
@@ -130,6 +130,7 @@ class Laola1TvIE(InfoExtractor):
|
||||
formats = self._extract_f4m_formats(
|
||||
'%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth),
|
||||
video_id, f4m_id='hds')
|
||||
self._sort_formats(formats)
|
||||
|
||||
categories_str = _v('meta_sports')
|
||||
categories = categories_str.split(',') if categories_str else []
|
||||
|
||||
@@ -37,6 +37,7 @@ class LRTIE(InfoExtractor):
|
||||
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
|
||||
webpage, 'm3u8 url', group='url')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
@@ -28,8 +28,8 @@ class LyndaBaseIE(InfoExtractor):
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'username': username.encode('utf-8'),
|
||||
'password': password.encode('utf-8'),
|
||||
'username': username,
|
||||
'password': password,
|
||||
'remember': 'false',
|
||||
'stayPut': 'false'
|
||||
}
|
||||
@@ -219,7 +219,7 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
'Course %s does not exist' % course_id, expected=True)
|
||||
|
||||
unaccessible_videos = 0
|
||||
videos = []
|
||||
entries = []
|
||||
|
||||
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
|
||||
# by single video API anymore
|
||||
@@ -229,20 +229,22 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
if video.get('HasAccess') is False:
|
||||
unaccessible_videos += 1
|
||||
continue
|
||||
if video.get('ID'):
|
||||
videos.append(video['ID'])
|
||||
video_id = video.get('ID')
|
||||
if video_id:
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
|
||||
'ie_key': LyndaIE.ie_key(),
|
||||
'chapter': chapter.get('Title'),
|
||||
'chapter_number': int_or_none(chapter.get('ChapterIndex')),
|
||||
'chapter_id': compat_str(chapter.get('ID')),
|
||||
})
|
||||
|
||||
if unaccessible_videos > 0:
|
||||
self._downloader.report_warning(
|
||||
'%s videos are only available for members (or paid members) and will not be downloaded. '
|
||||
% unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
|
||||
'Lynda')
|
||||
for video_id in videos]
|
||||
|
||||
course_title = course.get('Title')
|
||||
|
||||
return self.playlist_result(entries, course_id, course_title)
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
class MailRuIE(InfoExtractor):
|
||||
IE_NAME = 'mailru'
|
||||
IE_DESC = 'Видео@Mail.Ru'
|
||||
_VALID_URL = r'https?://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -61,6 +61,10 @@ class MailRuIE(InfoExtractor):
|
||||
'duration': 6001,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
},
|
||||
{
|
||||
'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@ class MatchTVIE(InfoExtractor):
|
||||
video_url = self._download_json(request, video_id)['data']['videoUrl']
|
||||
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
|
||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title('Матч ТВ - Прямой эфир'),
|
||||
|
||||
@@ -67,6 +67,7 @@ class MiTeleIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
display_id, f4m_id=loc))
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title')
|
||||
|
||||
@@ -2,39 +2,48 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import sanitized_Request
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class MovieClipsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
|
||||
_TEST = {
|
||||
'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597?autoPlay=true&playlistId=5',
|
||||
'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597',
|
||||
'md5': '42b5a0352d4933a7bd54f2104f481244',
|
||||
'info_dict': {
|
||||
'id': 'pKIGmG83AqD9',
|
||||
'display_id': 'warcraft-trailer-1-561180739597',
|
||||
'ext': 'mp4',
|
||||
'title': 'Warcraft Trailer 1',
|
||||
'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1446843055,
|
||||
'upload_date': '20151106',
|
||||
'uploader': 'Movieclips',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
req = sanitized_Request(url)
|
||||
# it doesn't work if it thinks the browser it's too old
|
||||
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)')
|
||||
webpage = self._download_webpage(req, display_id)
|
||||
theplatform_link = self._html_search_regex(r'src="(http://player.theplatform.com/p/.*?)"', webpage, 'theplatform link')
|
||||
title = self._html_search_regex(r'<title[^>]*>([^>]+)-\s*\d+\s*|\s*Movieclips.com</title>', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video = next(v for v in self._parse_json(self._search_regex(
|
||||
r'var\s+__REACT_ENGINE__\s*=\s*({.+});',
|
||||
webpage, 'react engine'), video_id)['playlist']['videos'] if v['id'] == video_id)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': theplatform_link,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(update_url_query(
|
||||
video['contentUrl'], {'mbr': 'true'}), {'force_smil_url': True}),
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'duration': float_or_none(video.get('duration')),
|
||||
'timestamp': parse_iso8601(video.get('dateCreated')),
|
||||
'thumbnail': video.get('defaultImage'),
|
||||
'uploader': video.get('provider'),
|
||||
}
|
||||
|
||||
@@ -2,13 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class MySpaceIE(InfoExtractor):
|
||||
@@ -24,6 +24,8 @@ class MySpaceIE(InfoExtractor):
|
||||
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
|
||||
'uploader': 'Five Minutes to the Stage',
|
||||
'uploader_id': 'fiveminutestothestage',
|
||||
'timestamp': 1414108751,
|
||||
'upload_date': '20141023',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -64,7 +66,7 @@ class MySpaceIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Starset - First Light',
|
||||
'description': 'md5:2d5db6c9d11d527683bcda818d332414',
|
||||
'uploader': 'Jacob Soren',
|
||||
'uploader': 'Yumi K',
|
||||
'uploader_id': 'SorenPromotions',
|
||||
'upload_date': '20140725',
|
||||
}
|
||||
@@ -78,6 +80,19 @@ class MySpaceIE(InfoExtractor):
|
||||
player_url = self._search_regex(
|
||||
r'playerSwf":"([^"?]*)', webpage, 'player URL')
|
||||
|
||||
def rtmp_format_from_stream_url(stream_url, width=None, height=None):
|
||||
rtmp_url, play_path = stream_url.split(';', 1)
|
||||
return {
|
||||
'format_id': 'rtmp',
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'player_url': player_url,
|
||||
'protocol': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
|
||||
if mobj.group('mediatype').startswith('music/song'):
|
||||
# songs don't store any useful info in the 'context' variable
|
||||
song_data = self._search_regex(
|
||||
@@ -93,8 +108,8 @@ class MySpaceIE(InfoExtractor):
|
||||
return self._search_regex(
|
||||
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
|
||||
song_data, name, default='', group='data')
|
||||
streamUrl = search_data('stream-url')
|
||||
if not streamUrl:
|
||||
stream_url = search_data('stream-url')
|
||||
if not stream_url:
|
||||
vevo_id = search_data('vevo-id')
|
||||
youtube_id = search_data('youtube-id')
|
||||
if vevo_id:
|
||||
@@ -106,36 +121,47 @@ class MySpaceIE(InfoExtractor):
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Found song but don\'t know how to download it')
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'uploader': search_data('artist-name'),
|
||||
'uploader_id': search_data('artist-username'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'duration': int_or_none(search_data('duration')),
|
||||
'formats': [rtmp_format_from_stream_url(stream_url)]
|
||||
}
|
||||
else:
|
||||
context = json.loads(self._search_regex(
|
||||
r'context = ({.*?});', webpage, 'context'))
|
||||
video = context['video']
|
||||
streamUrl = video['streamUrl']
|
||||
info = {
|
||||
'id': compat_str(video['mediaId']),
|
||||
video = self._parse_json(self._search_regex(
|
||||
r'context = ({.*?});', webpage, 'context'),
|
||||
video_id)['video']
|
||||
formats = []
|
||||
hls_stream_url = video.get('hlsStreamUrl')
|
||||
if hls_stream_url:
|
||||
formats.append({
|
||||
'format_id': 'hls',
|
||||
'url': hls_stream_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
stream_url = video.get('streamUrl')
|
||||
if stream_url:
|
||||
formats.append(rtmp_format_from_stream_url(
|
||||
stream_url,
|
||||
int_or_none(video.get('width')),
|
||||
int_or_none(video.get('height'))))
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'description': video['description'],
|
||||
'thumbnail': video['imageUrl'],
|
||||
'uploader': video['artistName'],
|
||||
'uploader_id': video['artistUsername'],
|
||||
'description': video.get('description'),
|
||||
'thumbnail': video.get('imageUrl'),
|
||||
'uploader': video.get('artistName'),
|
||||
'uploader_id': video.get('artistUsername'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': parse_iso8601(video.get('dateAdded')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
rtmp_url, play_path = streamUrl.split(';', 1)
|
||||
info.update({
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'player_url': player_url,
|
||||
'ext': 'flv',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class MySpaceAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'MySpace:album'
|
||||
|
||||
@@ -4,30 +4,40 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
url_basename,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class NationalGeographicIE(InfoExtractor):
|
||||
IE_NAME = 'natgeo'
|
||||
_VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||
'md5': '730855d559abbad6b42c2be1fa584917',
|
||||
'info_dict': {
|
||||
'id': '4DmDACA6Qtk_',
|
||||
'ext': 'flv',
|
||||
'id': '0000014b-70a1-dd8c-af7f-f7b559330001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mating Crabs Busted by Sharks',
|
||||
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||
'timestamp': 1423523799,
|
||||
'upload_date': '20150209',
|
||||
'uploader': 'NAGS',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
{
|
||||
'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
|
||||
'md5': '6a3105eb448c070503b3105fb9b320b5',
|
||||
'info_dict': {
|
||||
'id': '_JeBD_D7PlS5',
|
||||
'ext': 'flv',
|
||||
'id': 'ngc-I0IauNSWznb_UV008GxSbwY35BZvgi2e',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Real Jaws',
|
||||
'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
|
||||
'timestamp': 1433772632,
|
||||
'upload_date': '20150608',
|
||||
'uploader': 'NAGS',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
@@ -37,18 +47,67 @@ class NationalGeographicIE(InfoExtractor):
|
||||
name = url_basename(url)
|
||||
|
||||
webpage = self._download_webpage(url, name)
|
||||
feed_url = self._search_regex(
|
||||
r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||
guid = self._search_regex(
|
||||
r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"',
|
||||
webpage, 'guid')
|
||||
|
||||
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
|
||||
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
|
||||
theplatform_id = url_basename(content.attrib.get('url'))
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/ngs/media/guid/2423130747/%s?mbr=true' % guid,
|
||||
{'force_smil_url': True}),
|
||||
'id': guid,
|
||||
}
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
'http://link.theplatform.com/s/ngs/%s?formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||
# For some reason, the normal links don't work and we must force
|
||||
# the use of f4m
|
||||
{'force_smil_url': True}))
|
||||
|
||||
class NationalGeographicChannelIE(InfoExtractor):
|
||||
IE_NAME = 'natgeo:channel'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P<id>[^/?]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
|
||||
'md5': '518c9aa655686cf81493af5cc21e2a04',
|
||||
'info_dict': {
|
||||
'id': 'nB5vIAfmyllm',
|
||||
'ext': 'mp4',
|
||||
'title': 'Uncovering a Universal Knowledge',
|
||||
'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
|
||||
'timestamp': 1458680907,
|
||||
'upload_date': '20160322',
|
||||
'uploader': 'NEWA-FNG-NGTV',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
|
||||
'md5': 'c4912f656b4cbe58f3e000c489360989',
|
||||
'info_dict': {
|
||||
'id': '3TmMv9OvGwIR',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Stunning Red Bird of Paradise',
|
||||
'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
|
||||
'timestamp': 1459362152,
|
||||
'upload_date': '20160330',
|
||||
'uploader': 'NEWA-FNG-NGTV',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
release_url = self._search_regex(
|
||||
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
|
||||
webpage, 'release url')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
update_url_query(release_url, {'mbr': 'true', 'switch': 'http'}),
|
||||
{'force_smil_url': True}),
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
@@ -27,6 +27,9 @@ class NBCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||
'timestamp': 1424246400,
|
||||
'upload_date': '20150218',
|
||||
'uploader': 'NBCU-COM',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -50,6 +53,9 @@ class NBCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Star Wars Teaser',
|
||||
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
|
||||
'timestamp': 1417852800,
|
||||
'upload_date': '20141206',
|
||||
'uploader': 'NBCU-COM',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -78,6 +84,7 @@ class NBCIE(InfoExtractor):
|
||||
theplatform_url = 'http:' + theplatform_url
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(theplatform_url, {'source_url': url}),
|
||||
'id': video_id,
|
||||
}
|
||||
@@ -93,6 +100,9 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
|
||||
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
|
||||
'timestamp': 1426270238,
|
||||
'upload_date': '20150313',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
|
||||
@@ -134,6 +144,33 @@ class NBCSportsIE(InfoExtractor):
|
||||
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
|
||||
|
||||
|
||||
class CSNNEIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.csnne\.com/video/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter',
|
||||
'info_dict': {
|
||||
'id': 'yvBLLUgQ8WU0',
|
||||
'ext': 'mp4',
|
||||
'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.',
|
||||
'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3',
|
||||
'timestamp': 1459369979,
|
||||
'upload_date': '20160330',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': self._html_search_meta('twitter:player:stream', webpage),
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
|
||||
class NBCNewsIE(ThePlatformIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||
(?:video/.+?/(?P<id>\d+)|
|
||||
@@ -307,6 +344,7 @@ class MSNBCIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1406937606,
|
||||
'upload_date': '20140802',
|
||||
'uploader': 'NBCU-NEWS',
|
||||
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
|
||||
},
|
||||
}
|
||||
|
||||
@@ -27,17 +27,7 @@ class NovaMovIE(InfoExtractor):
|
||||
_DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
|
||||
_URL_TEMPLATE = 'http://%s/video/%s'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.novamov.com/video/4rurhn9x446jj',
|
||||
'md5': '7205f346a52bbeba427603ba10d4b935',
|
||||
'info_dict': {
|
||||
'id': '4rurhn9x446jj',
|
||||
'ext': 'flv',
|
||||
'title': 'search engine optimization',
|
||||
'description': 'search engine optimization is used to rank the web page in the google search engine'
|
||||
},
|
||||
'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
|
||||
}
|
||||
_TEST = None
|
||||
|
||||
def _check_existence(self, webpage, video_id):
|
||||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
|
||||
@@ -81,7 +71,7 @@ class NovaMovIE(InfoExtractor):
|
||||
|
||||
filekey = extract_filekey()
|
||||
|
||||
title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title', fatal=False)
|
||||
title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title')
|
||||
description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
|
||||
|
||||
api_response = self._download_webpage(
|
||||
@@ -187,3 +177,26 @@ class CloudTimeIE(NovaMovIE):
|
||||
_TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>'
|
||||
|
||||
_TEST = None
|
||||
|
||||
|
||||
class AuroraVidIE(NovaMovIE):
|
||||
IE_NAME = 'auroravid'
|
||||
IE_DESC = 'AuroraVid'
|
||||
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'auroravid\.to'}
|
||||
|
||||
_HOST = 'www.auroravid.to'
|
||||
|
||||
_FILE_DELETED_REGEX = r'This file no longer exists on our servers!<'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.auroravid.to/video/4rurhn9x446jj',
|
||||
'md5': '7205f346a52bbeba427603ba10d4b935',
|
||||
'info_dict': {
|
||||
'id': '4rurhn9x446jj',
|
||||
'ext': 'flv',
|
||||
'title': 'search engine optimization',
|
||||
'description': 'search engine optimization is used to rank the web page in the google search engine'
|
||||
},
|
||||
'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
|
||||
}
|
||||
|
||||
@@ -63,8 +63,11 @@ class NownessIE(NownessBaseIE):
|
||||
'title': 'Candor: The Art of Gesticulation',
|
||||
'description': 'Candor: The Art of Gesticulation',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader': 'Nowness',
|
||||
'timestamp': 1446745676,
|
||||
'upload_date': '20151105',
|
||||
'uploader_id': '2385340575001',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr',
|
||||
'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
|
||||
@@ -74,8 +77,11 @@ class NownessIE(NownessBaseIE):
|
||||
'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
|
||||
'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader': 'Nowness',
|
||||
'timestamp': 1407315371,
|
||||
'upload_date': '20140806',
|
||||
'uploader_id': '2385340575001',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
# vimeo
|
||||
'url': 'https://www.nowness.com/series/nowness-picks/jean-luc-godard-supercut',
|
||||
@@ -90,6 +96,7 @@ class NownessIE(NownessBaseIE):
|
||||
'uploader': 'Cinema Sem Lei',
|
||||
'uploader_id': 'cinemasemlei',
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -63,6 +63,7 @@ class NRKIE(InfoExtractor):
|
||||
if determine_ext(media_url) == 'f4m':
|
||||
formats = self._extract_f4m_formats(
|
||||
media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds')
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
|
||||
@@ -64,8 +64,8 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'Username': username.encode('utf-8'),
|
||||
'Password': password.encode('utf-8'),
|
||||
'Username': username,
|
||||
'Password': password,
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
@@ -279,13 +279,18 @@ class PluralsightCourseIE(PluralsightBaseIE):
|
||||
course_id, 'Downloading course data JSON')
|
||||
|
||||
entries = []
|
||||
for module in course_data:
|
||||
for num, module in enumerate(course_data, 1):
|
||||
for clip in module.get('clips', []):
|
||||
player_parameters = clip.get('playerParameters')
|
||||
if not player_parameters:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'%s/training/player?%s' % (self._API_BASE, player_parameters),
|
||||
'Pluralsight'))
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': '%s/training/player?%s' % (self._API_BASE, player_parameters),
|
||||
'ie_key': PluralsightIE.ie_key(),
|
||||
'chapter': module.get('title'),
|
||||
'chapter_number': num,
|
||||
'chapter_id': module.get('moduleRef'),
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, course_id, title, description)
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
compat_urllib_parse_urlparse,
|
||||
@@ -12,6 +14,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
@@ -75,7 +78,7 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
flashvars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashv1ars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
||||
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
||||
video_id)
|
||||
if flashvars:
|
||||
video_title = flashvars.get('video_title')
|
||||
@@ -149,9 +152,12 @@ class PornHubIE(InfoExtractor):
|
||||
class PornHubPlaylistBaseIE(InfoExtractor):
|
||||
def _extract_entries(self, webpage):
|
||||
return [
|
||||
self.url_result('http://www.pornhub.com/%s' % video_url, PornHubIE.ie_key())
|
||||
for video_url in set(re.findall(
|
||||
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage))
|
||||
self.url_result(
|
||||
'http://www.pornhub.com/%s' % video_url,
|
||||
PornHubIE.ie_key(), video_title=title)
|
||||
for video_url, title in orderedSet(re.findall(
|
||||
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
|
||||
webpage))
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -185,16 +191,31 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P<id>[^/]+)/videos'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/users/rushandlia/videos',
|
||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||
'info_dict': {
|
||||
'id': 'rushandlia',
|
||||
'id': 'zoe_ph',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
'playlist_mincount': 171,
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/users/rushandlia/videos',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, user_id)
|
||||
entries = []
|
||||
for page_num in itertools.count(1):
|
||||
try:
|
||||
webpage = self._download_webpage(
|
||||
url, user_id, 'Downloading page %d' % page_num,
|
||||
query={'page': page_num})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
break
|
||||
page_entries = self._extract_entries(webpage)
|
||||
if not page_entries:
|
||||
break
|
||||
entries.extend(page_entries)
|
||||
|
||||
return self.playlist_result(self._extract_entries(webpage), user_id)
|
||||
return self.playlist_result(entries, user_id)
|
||||
|
||||
@@ -31,6 +31,7 @@ class RestudyIE(InfoExtractor):
|
||||
formats = self._extract_smil_formats(
|
||||
'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id,
|
||||
video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -39,9 +39,14 @@ class RteIE(InfoExtractor):
|
||||
duration = float_or_none(self._html_search_meta(
|
||||
'duration', webpage, 'duration', fatal=False), 1000)
|
||||
|
||||
thumbnail_id = self._search_regex(
|
||||
r'<meta name="thumbnail" content="uri:irus:(.*?)" />', webpage, 'thumbnail')
|
||||
thumbnail = 'http://img.rasset.ie/' + thumbnail_id + '.jpg'
|
||||
thumbnail = None
|
||||
thumbnail_meta = self._html_search_meta('thumbnail', webpage)
|
||||
if thumbnail_meta:
|
||||
thumbnail_id = self._search_regex(
|
||||
r'uri:irus:(.+)', thumbnail_meta,
|
||||
'thumbnail id', fatal=False)
|
||||
if thumbnail_id:
|
||||
thumbnail = 'http://img.rasset.ie/%s.jpg' % thumbnail_id
|
||||
|
||||
feeds_url = self._html_search_meta('feeds-prefix', webpage, 'feeds url') + video_id
|
||||
json_string = self._download_json(feeds_url, video_id)
|
||||
@@ -49,6 +54,7 @@ class RteIE(InfoExtractor):
|
||||
# f4m_url = server + relative_url
|
||||
f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
|
||||
f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
self._sort_formats(f4m_formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -209,6 +209,7 @@ class RTVELiveIE(InfoExtractor):
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
m3u8_url = _decrypt_url(png)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -38,6 +38,7 @@ class RTVNHIE(InfoExtractor):
|
||||
item['file'], video_id, ext='mp4', entry_protocol='m3u8_native'))
|
||||
elif item.get('type') == '':
|
||||
formats.append({'url': item['file']})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -24,6 +24,9 @@ class SBSIE(InfoExtractor):
|
||||
'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'duration': 308,
|
||||
'timestamp': 1408613220,
|
||||
'upload_date': '20140821',
|
||||
'uploader': 'SBSC',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
|
||||
@@ -57,6 +60,7 @@ class SBSIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'id': video_id,
|
||||
'url': smuggle_url(theplatform_url, {'force_smil_url': True}),
|
||||
'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}),
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ScreencastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.screencast.com/t/3ZEjQXlT',
|
||||
'md5': '917df1c13798a3e96211dd1561fded83',
|
||||
@@ -53,8 +53,10 @@ class ScreencastIE(InfoExtractor):
|
||||
'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
|
||||
'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
|
||||
}
|
||||
},
|
||||
]
|
||||
}, {
|
||||
'url': 'http://screencast.com/t/aAB3iowa',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -94,8 +96,9 @@ class ScreencastIE(InfoExtractor):
|
||||
title = self._og_search_title(webpage, default=None)
|
||||
if title is None:
|
||||
title = self._html_search_regex(
|
||||
[r'<b>Title:</b> ([^<]*)</div>',
|
||||
r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
|
||||
[r'<b>Title:</b> ([^<]+)</div>',
|
||||
r'class="tabSeperator">></span><span class="tabText">(.+?)<',
|
||||
r'<title>([^<]+)</title>'],
|
||||
webpage, 'title')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
|
||||
@@ -77,6 +77,7 @@ class ShahidIE(InfoExtractor):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
video = self._download_json(
|
||||
'%s/%s/%s?%s' % (
|
||||
|
||||
@@ -99,6 +99,7 @@ class SportBoxEmbedIE(InfoExtractor):
|
||||
webpage, 'hls file')
|
||||
|
||||
formats = self._extract_m3u8_formats(hls, video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title')
|
||||
|
||||
@@ -82,6 +82,7 @@ class TelecincoIE(InfoExtractor):
|
||||
)
|
||||
formats = self._extract_m3u8_formats(
|
||||
token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': embed_data['videoId'],
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TenPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
|
||||
_TEST = {
|
||||
'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
|
||||
'info_dict': {
|
||||
'id': '2695695426001',
|
||||
'ext': 'flv',
|
||||
'title': 'TENplay: TV your way',
|
||||
'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.',
|
||||
'timestamp': 1380150606.889,
|
||||
'upload_date': '20130925',
|
||||
'uploader': 'TENplay',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
}
|
||||
|
||||
_video_fields = [
|
||||
'id', 'name', 'shortDescription', 'longDescription', 'creationDate',
|
||||
'publishedDate', 'lastModifiedDate', 'customFields', 'videoStillURL',
|
||||
'thumbnailURL', 'referenceId', 'length', 'playsTotal',
|
||||
'playsTrailingWeek', 'renditions', 'captioning', 'startDate', 'endDate']
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url)
|
||||
video_id = self._html_search_regex(
|
||||
r'videoID: "(\d+?)"', webpage, 'video_id')
|
||||
api_token = self._html_search_regex(
|
||||
r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token')
|
||||
title = self._html_search_regex(
|
||||
r'<meta property="og:title" content="\s*(.*?)\s*"\s*/?\s*>',
|
||||
webpage, 'title')
|
||||
|
||||
json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title)
|
||||
|
||||
formats = []
|
||||
for rendition in json['renditions']:
|
||||
url = rendition['remoteUrl'] or rendition['url']
|
||||
protocol = 'rtmp' if url.startswith('rtmp') else 'http'
|
||||
ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower()
|
||||
|
||||
if protocol == 'rtmp':
|
||||
url = url.replace('&mp4:', '')
|
||||
|
||||
tbr = int_or_none(rendition.get('encodingRate'), 1000)
|
||||
|
||||
formats.append({
|
||||
'format_id': '_'.join(
|
||||
['rtmp', rendition['videoContainer'].lower(),
|
||||
rendition['videoCodec'].lower(), '%sk' % tbr]),
|
||||
'width': int_or_none(rendition['frameWidth']),
|
||||
'height': int_or_none(rendition['frameHeight']),
|
||||
'tbr': tbr,
|
||||
'filesize': int_or_none(rendition['size']),
|
||||
'protocol': protocol,
|
||||
'ext': ext,
|
||||
'vcodec': rendition['videoCodec'].lower(),
|
||||
'container': rendition['videoContainer'].lower(),
|
||||
'url': url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': json['referenceId'],
|
||||
'title': json['name'],
|
||||
'description': json['shortDescription'] or json['longDescription'],
|
||||
'formats': formats,
|
||||
'thumbnails': [{
|
||||
'url': json['videoStillURL']
|
||||
}, {
|
||||
'url': json['thumbnailURL']
|
||||
}],
|
||||
'thumbnail': json['videoStillURL'],
|
||||
'duration': float_or_none(json.get('length'), 1000),
|
||||
'timestamp': float_or_none(json.get('creationDate'), 1000),
|
||||
'uploader': json.get('customFields', {}).get('production_company_distributor') or 'TENplay',
|
||||
'view_count': int_or_none(json.get('playsTotal')),
|
||||
}
|
||||
@@ -76,13 +76,15 @@ class ThePlatformBaseIE(OnceIE):
|
||||
'description': info['description'],
|
||||
'thumbnail': info['defaultThumbnailUrl'],
|
||||
'duration': int_or_none(info.get('duration'), 1000),
|
||||
'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
|
||||
'uploader': info.get('billingCode'),
|
||||
}
|
||||
|
||||
|
||||
class ThePlatformIE(ThePlatformBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||
(?:(?P<media>(?:(?:[^/]+/)+select/)?media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||
(?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||
|theplatform:)(?P<id>[^/\?&]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
@@ -94,6 +96,9 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
'title': 'Blackberry\'s big, bold Z30',
|
||||
'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
|
||||
'duration': 247,
|
||||
'timestamp': 1383239700,
|
||||
'upload_date': '20131031',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -107,6 +112,9 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
'ext': 'flv',
|
||||
'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
|
||||
'title': 'Tesla Model S: A second step towards a cleaner motoring future',
|
||||
'timestamp': 1426176191,
|
||||
'upload_date': '20150312',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -119,6 +127,7 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:644ad9188d655b742f942bf2e06b002d',
|
||||
'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
|
||||
'uploader': 'EGSM',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
|
||||
@@ -135,6 +144,7 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1435752600,
|
||||
'upload_date': '20150701',
|
||||
'uploader': 'NBCU-NEWS',
|
||||
},
|
||||
}, {
|
||||
# From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
|
||||
@@ -154,7 +164,7 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
def hex_to_str(hex):
|
||||
return binascii.a2b_hex(hex)
|
||||
|
||||
relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
|
||||
relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
|
||||
clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
|
||||
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
|
||||
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
|
||||
@@ -170,10 +180,10 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
if not provider_id:
|
||||
provider_id = 'dJ5BDC'
|
||||
|
||||
path = provider_id
|
||||
path = provider_id + '/'
|
||||
if mobj.group('media'):
|
||||
path += '/media'
|
||||
path += '/' + video_id
|
||||
path += mobj.group('media')
|
||||
path += video_id
|
||||
|
||||
qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
if 'guid' in qs_dict:
|
||||
|
||||
@@ -19,6 +19,10 @@ class TheStarIE(InfoExtractor):
|
||||
'uploader_id': '794267642001',
|
||||
'timestamp': 1454353482,
|
||||
'upload_date': '20160201',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/794267642001/default_default/index.html?videoId=%s'
|
||||
|
||||
@@ -69,6 +69,7 @@ class TubiTvIE(InfoExtractor):
|
||||
apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu')
|
||||
m3u8_url = codecs.decode(apu, 'rot_13')[::-1]
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -5,6 +5,7 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
InAdvancePagedList,
|
||||
float_or_none,
|
||||
@@ -46,6 +47,19 @@ class TudouIE(InfoExtractor):
|
||||
|
||||
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
|
||||
|
||||
# Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf
|
||||
# 0001, 0002 and 4001 are not included as they indicate temporary issues
|
||||
TVC_ERRORS = {
|
||||
'0003': 'The video is deleted or does not exist',
|
||||
'1001': 'This video is unavailable due to licensing issues',
|
||||
'1002': 'This video is unavailable as it\'s under review',
|
||||
'1003': 'This video is unavailable as it\'s under review',
|
||||
'3001': 'Password required',
|
||||
'5001': 'This video is available in Mainland China only due to licensing issues',
|
||||
'7001': 'This video is unavailable',
|
||||
'8001': 'This video is unavailable due to licensing issues',
|
||||
}
|
||||
|
||||
def _url_for_id(self, video_id, quality=None):
|
||||
info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
|
||||
if quality:
|
||||
@@ -63,6 +77,15 @@ class TudouIE(InfoExtractor):
|
||||
if youku_vcode:
|
||||
return self.url_result('youku:' + youku_vcode, ie='Youku')
|
||||
|
||||
if not item_data.get('itemSegs'):
|
||||
tvc_code = item_data.get('tvcCode')
|
||||
if tvc_code:
|
||||
err_msg = self.TVC_ERRORS.get(tvc_code)
|
||||
if err_msg:
|
||||
raise ExtractorError('Tudou said: %s' % err_msg, expected=True)
|
||||
raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code)
|
||||
raise ExtractorError('Unxpected error returned from Tudou')
|
||||
|
||||
title = unescapeHTML(item_data['kw'])
|
||||
description = item_data.get('desc')
|
||||
thumbnail_url = item_data.get('pic')
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class TumblrIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<blog_name>.*?)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
|
||||
_VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
|
||||
_TESTS = [{
|
||||
'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
|
||||
'md5': '479bb068e5b16462f5176a6828829767',
|
||||
|
||||
@@ -21,6 +21,7 @@ class TV3IE(InfoExtractor):
|
||||
'Failed to download MPD manifest'
|
||||
],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -102,6 +102,9 @@ class TwitterCardIE(TwitterBaseIE):
|
||||
r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'),
|
||||
video_id)
|
||||
|
||||
if config.get('source_type') == 'vine':
|
||||
return self.url_result(config['player_url'], 'Vine')
|
||||
|
||||
def _search_dimensions_in_video_url(a_format, video_url):
|
||||
m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
|
||||
if m:
|
||||
@@ -110,10 +113,9 @@ class TwitterCardIE(TwitterBaseIE):
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
|
||||
playlist = config.get('playlist')
|
||||
if playlist:
|
||||
video_url = playlist[0]['source']
|
||||
video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
|
||||
|
||||
if video_url:
|
||||
f = {
|
||||
'url': video_url,
|
||||
}
|
||||
@@ -185,7 +187,6 @@ class TwitterIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 12.922,
|
||||
'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
|
||||
'uploader': 'FREE THE NIPPLE',
|
||||
'uploader_id': 'freethenipple',
|
||||
@@ -247,6 +248,18 @@ class TwitterIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
|
||||
'md5': '89a15ed345d13b86e9a5a5e051fa308a',
|
||||
'info_dict': {
|
||||
'id': 'MIOxnrUteUd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
|
||||
'uploader': 'TAKUMA',
|
||||
'uploader_id': '1004126642786242560',
|
||||
'upload_date': '20140615',
|
||||
},
|
||||
'add_ie': ['Vine'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
@@ -8,6 +10,8 @@ from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@@ -50,22 +54,37 @@ class UdemyIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_course_info(self, webpage, video_id):
|
||||
course = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
r'ng-init=["\'].*\bcourse=({.+?});', webpage, 'course', default='{}')),
|
||||
video_id, fatal=False) or {}
|
||||
course_id = course.get('id') or self._search_regex(
|
||||
(r'"id"\s*:\s*(\d+)', r'data-course-id=["\'](\d+)'),
|
||||
webpage, 'course id')
|
||||
return course_id, course.get('title')
|
||||
|
||||
def _enroll_course(self, base_url, webpage, course_id):
|
||||
def combine_url(base_url, url):
|
||||
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
|
||||
|
||||
checkout_url = unescapeHTML(self._search_regex(
|
||||
r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/payment/checkout/.+?)\1',
|
||||
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/payment/checkout/.+?)\1',
|
||||
webpage, 'checkout url', group='url', default=None))
|
||||
if checkout_url:
|
||||
raise ExtractorError(
|
||||
'Course %s is not free. You have to pay for it before you can download. '
|
||||
'Use this URL to confirm purchase: %s' % (course_id, checkout_url), expected=True)
|
||||
'Use this URL to confirm purchase: %s'
|
||||
% (course_id, combine_url(base_url, checkout_url)),
|
||||
expected=True)
|
||||
|
||||
enroll_url = unescapeHTML(self._search_regex(
|
||||
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1',
|
||||
webpage, 'enroll url', group='url', default=None))
|
||||
if enroll_url:
|
||||
if not enroll_url.startswith('http'):
|
||||
enroll_url = compat_urlparse.urljoin(base_url, enroll_url)
|
||||
webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course')
|
||||
webpage = self._download_webpage(
|
||||
combine_url(base_url, enroll_url),
|
||||
course_id, 'Enrolling in the course')
|
||||
if '>You have enrolled in' in webpage:
|
||||
self.to_screen('%s: Successfully enrolled in the course' % course_id)
|
||||
|
||||
@@ -73,11 +92,8 @@ class UdemyIE(InfoExtractor):
|
||||
return self._download_json(
|
||||
'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?%s' % (
|
||||
course_id, lecture_id, compat_urllib_parse_urlencode({
|
||||
'video_only': '',
|
||||
'auto_play': '',
|
||||
'fields[lecture]': 'title,description,asset',
|
||||
'fields[lecture]': 'title,description,view_html,asset',
|
||||
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data',
|
||||
'instructorPreviewMode': 'False',
|
||||
})),
|
||||
lecture_id, 'Downloading lecture JSON')
|
||||
|
||||
@@ -92,7 +108,7 @@ class UdemyIE(InfoExtractor):
|
||||
error_str += ' - %s' % error_data.get('formErrors')
|
||||
raise ExtractorError(error_str, expected=True)
|
||||
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||
def _download_json(self, url_or_request, *args, **kwargs):
|
||||
headers = {
|
||||
'X-Udemy-Snail-Case': 'true',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
@@ -110,7 +126,7 @@ class UdemyIE(InfoExtractor):
|
||||
else:
|
||||
url_or_request = sanitized_Request(url_or_request, headers=headers)
|
||||
|
||||
response = super(UdemyIE, self)._download_json(url_or_request, video_id, note)
|
||||
response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs)
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
@@ -135,8 +151,8 @@ class UdemyIE(InfoExtractor):
|
||||
login_form = self._form_hidden_inputs('login-form', login_popup)
|
||||
|
||||
login_form.update({
|
||||
'email': username.encode('utf-8'),
|
||||
'password': password.encode('utf-8'),
|
||||
'email': username,
|
||||
'password': password,
|
||||
})
|
||||
|
||||
request = sanitized_Request(
|
||||
@@ -160,9 +176,7 @@ class UdemyIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, lecture_id)
|
||||
|
||||
course_id = self._search_regex(
|
||||
(r'data-course-id=["\'](\d+)', r'"id"\s*:\s*(\d+)'),
|
||||
webpage, 'course id')
|
||||
course_id, _ = self._extract_course_info(webpage, lecture_id)
|
||||
|
||||
try:
|
||||
lecture = self._download_lecture(course_id, lecture_id)
|
||||
@@ -179,12 +193,12 @@ class UdemyIE(InfoExtractor):
|
||||
|
||||
asset = lecture['asset']
|
||||
|
||||
asset_type = asset.get('assetType') or asset.get('asset_type')
|
||||
asset_type = asset.get('asset_type') or asset.get('assetType')
|
||||
if asset_type != 'Video':
|
||||
raise ExtractorError(
|
||||
'Lecture %s is not a video' % lecture_id, expected=True)
|
||||
|
||||
stream_url = asset.get('streamUrl') or asset.get('stream_url')
|
||||
stream_url = asset.get('stream_url') or asset.get('streamUrl')
|
||||
if stream_url:
|
||||
youtube_url = self._search_regex(
|
||||
r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url, 'youtube URL', default=None)
|
||||
@@ -192,7 +206,7 @@ class UdemyIE(InfoExtractor):
|
||||
return self.url_result(youtube_url, 'Youtube')
|
||||
|
||||
video_id = asset['id']
|
||||
thumbnail = asset.get('thumbnailUrl') or asset.get('thumbnail_url')
|
||||
thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl')
|
||||
duration = float_or_none(asset.get('data', {}).get('duration'))
|
||||
|
||||
formats = []
|
||||
@@ -200,7 +214,7 @@ class UdemyIE(InfoExtractor):
|
||||
def extract_output_format(src):
|
||||
return {
|
||||
'url': src['url'],
|
||||
'format_id': '%sp' % (src.get('label') or format_id),
|
||||
'format_id': '%sp' % (src.get('height') or format_id),
|
||||
'width': int_or_none(src.get('width')),
|
||||
'height': int_or_none(src.get('height')),
|
||||
'vbr': int_or_none(src.get('video_bitrate_in_kbps')),
|
||||
@@ -217,9 +231,13 @@ class UdemyIE(InfoExtractor):
|
||||
if not isinstance(outputs, dict):
|
||||
outputs = {}
|
||||
|
||||
for format_id, output in outputs.items():
|
||||
if isinstance(output, dict) and output.get('url'):
|
||||
formats.append(extract_output_format(output))
|
||||
def add_output_format_meta(f, key):
|
||||
output = outputs.get(key)
|
||||
if isinstance(output, dict):
|
||||
output_format = extract_output_format(output)
|
||||
output_format.update(f)
|
||||
return output_format
|
||||
return f
|
||||
|
||||
download_urls = asset.get('download_urls')
|
||||
if isinstance(download_urls, dict):
|
||||
@@ -232,21 +250,48 @@ class UdemyIE(InfoExtractor):
|
||||
format_id = format_.get('label')
|
||||
f = {
|
||||
'url': format_['file'],
|
||||
'format_id': '%sp' % format_id,
|
||||
'height': int_or_none(format_id),
|
||||
}
|
||||
if format_id:
|
||||
# Some videos contain additional metadata (e.g.
|
||||
# https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
|
||||
output = outputs.get(format_id)
|
||||
if isinstance(output, dict):
|
||||
output_format = extract_output_format(output)
|
||||
output_format.update(f)
|
||||
f = output_format
|
||||
else:
|
||||
f['format_id'] = '%sp' % format_id
|
||||
f = add_output_format_meta(f, format_id)
|
||||
formats.append(f)
|
||||
|
||||
self._sort_formats(formats)
|
||||
view_html = lecture.get('view_html')
|
||||
if view_html:
|
||||
view_html_urls = set()
|
||||
for source in re.findall(r'<source[^>]+>', view_html):
|
||||
attributes = extract_attributes(source)
|
||||
src = attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
res = attributes.get('data-res')
|
||||
height = int_or_none(res)
|
||||
if src in view_html_urls:
|
||||
continue
|
||||
view_html_urls.add(src)
|
||||
if attributes.get('type') == 'application/x-mpegURL' or determine_ext(src) == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
for f in m3u8_formats:
|
||||
m = re.search(r'/hls_(?P<height>\d{3,4})_(?P<tbr>\d{2,})/', f['url'])
|
||||
if m:
|
||||
if not f.get('height'):
|
||||
f['height'] = int(m.group('height'))
|
||||
if not f.get('tbr'):
|
||||
f['tbr'] = int(m.group('tbr'))
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
formats.append(add_output_format_meta({
|
||||
'url': src,
|
||||
'format_id': '%dp' % height if height else None,
|
||||
'height': height,
|
||||
}, res))
|
||||
|
||||
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -260,7 +305,7 @@ class UdemyIE(InfoExtractor):
|
||||
|
||||
class UdemyCourseIE(UdemyIE):
|
||||
IE_NAME = 'udemy:course'
|
||||
_VALID_URL = r'https?://www\.udemy\.com/(?P<id>[\da-z-]+)'
|
||||
_VALID_URL = r'https?://www\.udemy\.com/(?P<id>[^/?#&]+)'
|
||||
_TESTS = []
|
||||
|
||||
@classmethod
|
||||
@@ -272,29 +317,34 @@ class UdemyCourseIE(UdemyIE):
|
||||
|
||||
webpage = self._download_webpage(url, course_path)
|
||||
|
||||
response = self._download_json(
|
||||
'https://www.udemy.com/api-1.1/courses/%s' % course_path,
|
||||
course_path, 'Downloading course JSON')
|
||||
|
||||
course_id = response['id']
|
||||
course_title = response.get('title')
|
||||
course_id, title = self._extract_course_info(webpage, course_path)
|
||||
|
||||
self._enroll_course(url, webpage, course_id)
|
||||
|
||||
response = self._download_json(
|
||||
'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
|
||||
course_id, 'Downloading course curriculum')
|
||||
'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id,
|
||||
course_id, 'Downloading course curriculum', query={
|
||||
'fields[chapter]': 'title,object_index',
|
||||
'fields[lecture]': 'title,asset',
|
||||
'page_size': '1000',
|
||||
})
|
||||
|
||||
entries = []
|
||||
chapter, chapter_number = None, None
|
||||
for asset in response:
|
||||
asset_type = asset.get('assetType') or asset.get('asset_type')
|
||||
if asset_type == 'Video':
|
||||
asset_id = asset.get('id')
|
||||
if asset_id:
|
||||
chapter, chapter_number = [None] * 2
|
||||
for entry in response['results']:
|
||||
clazz = entry.get('_class')
|
||||
if clazz == 'lecture':
|
||||
asset = entry.get('asset')
|
||||
if isinstance(asset, dict):
|
||||
asset_type = asset.get('asset_type') or asset.get('assetType')
|
||||
if asset_type != 'Video':
|
||||
continue
|
||||
lecture_id = entry.get('id')
|
||||
if lecture_id:
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']),
|
||||
'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']),
|
||||
'title': entry.get('title'),
|
||||
'ie_key': UdemyIE.ie_key(),
|
||||
}
|
||||
if chapter_number:
|
||||
@@ -302,8 +352,8 @@ class UdemyCourseIE(UdemyIE):
|
||||
if chapter:
|
||||
entry['chapter'] = chapter
|
||||
entries.append(entry)
|
||||
elif asset.get('type') == 'chapter':
|
||||
chapter_number = asset.get('index') or asset.get('object_index')
|
||||
chapter = asset.get('title')
|
||||
elif clazz == 'chapter':
|
||||
chapter_number = entry.get('object_index')
|
||||
chapter = entry.get('title')
|
||||
|
||||
return self.playlist_result(entries, course_id, course_title)
|
||||
return self.playlist_result(entries, course_id, title)
|
||||
|
||||
@@ -152,7 +152,7 @@ class VevoIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||
json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||
response = self._download_json(
|
||||
json_url, video_id, 'Downloading video info', 'Unable to download info')
|
||||
video_info = response.get('video') or {}
|
||||
|
||||
@@ -111,6 +111,7 @@ class VideomoreIE(InfoExtractor):
|
||||
|
||||
video_url = xpath_text(video, './/video_url', 'video url', fatal=True)
|
||||
formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds')
|
||||
self._sort_formats(formats)
|
||||
|
||||
data = self._download_json(
|
||||
'http://videomore.ru/video/tracks/%s.json' % video_id,
|
||||
|
||||
@@ -50,6 +50,7 @@ class VierIE(InfoExtractor):
|
||||
|
||||
playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
|
||||
formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage, default=display_id)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
|
||||
@@ -151,6 +151,7 @@ class ViideaIE(InfoExtractor):
|
||||
smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id)
|
||||
smil = self._download_smil(smil_url, lecture_id)
|
||||
info = self._parse_smil(smil, smil_url, lecture_id)
|
||||
self._sort_formats(info['formats'])
|
||||
info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id)
|
||||
info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id)
|
||||
if multipart:
|
||||
|
||||
132
youtube_dl/extractor/voxmedia.py
Normal file
132
youtube_dl/extractor/voxmedia.py
Normal file
@@ -0,0 +1,132 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class VoxMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P<id>[^/?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
|
||||
'md5': '73856edf3e89a711e70d5cf7cb280b37',
|
||||
'info_dict': {
|
||||
'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe',
|
||||
'ext': 'mp4',
|
||||
'title': 'Google\'s new material design direction',
|
||||
'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
|
||||
}
|
||||
}, {
|
||||
# data-ooyala-id
|
||||
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
|
||||
'md5': 'd744484ff127884cd2ba09e3fa604e4b',
|
||||
'info_dict': {
|
||||
'id': 'RkZXU4cTphOCPDMZg5oEounJyoFI0g-B',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Nexus 6: hands-on with Google\'s phablet',
|
||||
'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
|
||||
}
|
||||
}, {
|
||||
# volume embed
|
||||
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
|
||||
'md5': '375c483c5080ab8cd85c9c84cfc2d1e4',
|
||||
'info_dict': {
|
||||
'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b',
|
||||
'ext': 'mp4',
|
||||
'title': 'The new frontier of LGBTQ civil rights, explained',
|
||||
'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
|
||||
}
|
||||
}, {
|
||||
# youtube embed
|
||||
'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
|
||||
'md5': '83b3080489fb103941e549352d3e0977',
|
||||
'info_dict': {
|
||||
'id': 'FcNHTJU1ufM',
|
||||
'ext': 'mp4',
|
||||
'title': 'How "the robot" became the greatest novelty dance of all time',
|
||||
'description': 'md5:b081c0d588b8b2085870cda55e6da176',
|
||||
'upload_date': '20160324',
|
||||
'uploader_id': 'voxdotcom',
|
||||
'uploader': 'Vox',
|
||||
}
|
||||
}, {
|
||||
# SBN.VideoLinkset.entryGroup multiple ooyala embeds
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
'info_dict': {
|
||||
'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
'title': '25 lies you will tell yourself on National Signing Day',
|
||||
'description': 'It\'s the most self-delusional time of the year, and everyone\'s gonna tell the same lies together!',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '721fededf2ab74ae4176c8c8cbfe092e',
|
||||
'info_dict': {
|
||||
'id': 'p3cThlMjE61VDi_SD9JlIteSNPWVDBB9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buddy Hield vs Steph Curry (and the world)',
|
||||
'description': 'Let’s dissect only the most important Final Four storylines.',
|
||||
},
|
||||
}, {
|
||||
'md5': 'bf0c5cc115636af028be1bab79217ea9',
|
||||
'info_dict': {
|
||||
'id': 'BmbmVjMjE6esPHxdALGubTrouQ0jYLHj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chasing Cinderella 2016: Syracuse basketball',
|
||||
'description': 'md5:e02d56b026d51aa32c010676765a690d',
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id))
|
||||
|
||||
def create_entry(provider_video_id, provider_video_type, title=None, description=None):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id),
|
||||
'title': title or self._og_search_title(webpage),
|
||||
'description': description or self._og_search_description(webpage),
|
||||
}
|
||||
|
||||
entries = []
|
||||
entries_data = self._search_regex([
|
||||
r'Chorus\.VideoContext\.addVideo\((\[{.+}\])\);',
|
||||
r'var\s+entry\s*=\s*({.+});',
|
||||
r'SBN\.VideoLinkset\.entryGroup\(\s*(\[.+\])',
|
||||
], webpage, 'video data', default=None)
|
||||
if entries_data:
|
||||
entries_data = self._parse_json(entries_data, display_id)
|
||||
if isinstance(entries_data, dict):
|
||||
entries_data = [entries_data]
|
||||
for video_data in entries_data:
|
||||
provider_video_id = video_data.get('provider_video_id')
|
||||
provider_video_type = video_data.get('provider_video_type')
|
||||
if provider_video_id and provider_video_type:
|
||||
entries.append(create_entry(
|
||||
provider_video_id, provider_video_type,
|
||||
video_data.get('title'), video_data.get('description')))
|
||||
|
||||
provider_video_id = self._search_regex(
|
||||
r'data-ooyala-id="([^"]+)"', webpage, 'ooyala id', default=None)
|
||||
if provider_video_id:
|
||||
entries.append(create_entry(provider_video_id, 'ooyala'))
|
||||
|
||||
volume_uuid = self._search_regex(
|
||||
r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None)
|
||||
if volume_uuid:
|
||||
volume_webpage = self._download_webpage(
|
||||
'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
|
||||
for provider_video_type in ('ooyala', 'youtube'):
|
||||
provider_video_id = video_data.get('%s_id' % provider_video_type)
|
||||
if provider_video_id:
|
||||
description = video_data.get('description_long') or video_data.get('description_short')
|
||||
entries.append(create_entry(
|
||||
provider_video_id, provider_video_type, video_data.get('title_short'), description))
|
||||
break
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
else:
|
||||
return self.playlist_result(entries, display_id, self._og_search_title(webpage), self._og_search_description(webpage))
|
||||
@@ -41,10 +41,12 @@ class YnetIE(InfoExtractor):
|
||||
m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
|
||||
if m:
|
||||
title = m.group('title')
|
||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': self._extract_f4m_formats(f4m_url, video_id),
|
||||
'formats': formats,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
@@ -234,7 +234,9 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||
|
||||
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||
def _process_page(self, content):
|
||||
for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
|
||||
for playlist_id in orderedSet(re.findall(
|
||||
r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
|
||||
content)):
|
||||
yield self.url_result(
|
||||
'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
|
||||
|
||||
@@ -268,7 +270,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
))
|
||||
|(?:
|
||||
youtu\.be| # just youtu.be/xxxx
|
||||
vid\.plus # or vid.plus/xxxx
|
||||
vid\.plus| # or vid.plus/xxxx
|
||||
zwearz\.com/watch| # or zwearz.com/watch/xxxx
|
||||
)/
|
||||
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
|
||||
)
|
||||
@@ -756,6 +759,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'http://vid.plus/FlRa-iH7PGw',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
|
||||
# Also tests cut-off URL expansion in video description (see
|
||||
|
||||
@@ -85,6 +85,13 @@ class ZDFIE(InfoExtractor):
|
||||
uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
|
||||
uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
|
||||
upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))
|
||||
subtitles = {}
|
||||
captions_url = doc.find('.//caption/url')
|
||||
if captions_url is not None:
|
||||
subtitles['de'] = [{
|
||||
'url': captions_url.text,
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
|
||||
def xml_to_thumbnails(fnode):
|
||||
thumbnails = []
|
||||
@@ -190,6 +197,7 @@ class ZDFIE(InfoExtractor):
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -536,7 +536,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||
sub_filenames.append(old_file)
|
||||
new_file = subtitles_filename(filename, lang, new_ext)
|
||||
|
||||
if ext == 'dfxp' or ext == 'ttml':
|
||||
if ext == 'dfxp' or ext == 'ttml' or ext == 'tt':
|
||||
self._downloader.report_warning(
|
||||
'You have requested to convert dfxp (TTML) subtitles into another format, '
|
||||
'which results in style information loss')
|
||||
|
||||
@@ -417,9 +417,12 @@ def sanitize_path(s):
|
||||
|
||||
# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
|
||||
# unwanted failures due to missing protocol
|
||||
def sanitize_url(url):
|
||||
return 'http:%s' % url if url.startswith('//') else url
|
||||
|
||||
|
||||
def sanitized_Request(url, *args, **kwargs):
|
||||
return compat_urllib_request.Request(
|
||||
'http:%s' % url if url.startswith('//') else url, *args, **kwargs)
|
||||
return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
|
||||
|
||||
|
||||
def orderedSet(iterable):
|
||||
@@ -775,12 +778,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
|
||||
# Substitute URL if any change after escaping
|
||||
if url != url_escaped:
|
||||
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
|
||||
new_req = req_type(
|
||||
url_escaped, data=req.data, headers=req.headers,
|
||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||
new_req.timeout = req.timeout
|
||||
req = new_req
|
||||
req = update_Request(req, url=url_escaped)
|
||||
|
||||
for h, v in std_headers.items():
|
||||
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
|
||||
@@ -1801,6 +1799,20 @@ def update_url_query(url, query):
|
||||
query=compat_urllib_parse_urlencode(qs, True)))
|
||||
|
||||
|
||||
def update_Request(req, url=None, data=None, headers={}, query={}):
|
||||
req_headers = req.headers.copy()
|
||||
req_headers.update(headers)
|
||||
req_data = data or req.data
|
||||
req_url = update_url_query(url or req.get_full_url(), query)
|
||||
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
|
||||
new_req = req_type(
|
||||
req_url, data=req_data, headers=req_headers,
|
||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||
if hasattr(req, 'timeout'):
|
||||
new_req.timeout = req.timeout
|
||||
return new_req
|
||||
|
||||
|
||||
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||
if isinstance(key_or_keys, (list, tuple)):
|
||||
for key in key_or_keys:
|
||||
@@ -2119,6 +2131,7 @@ def dfxp2srt(dfxp_data):
|
||||
_x = functools.partial(xpath_with_ns, ns_map={
|
||||
'ttml': 'http://www.w3.org/ns/ttml',
|
||||
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
|
||||
'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1',
|
||||
})
|
||||
|
||||
class TTMLPElementParser(object):
|
||||
@@ -2145,7 +2158,7 @@ def dfxp2srt(dfxp_data):
|
||||
|
||||
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
|
||||
out = []
|
||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
|
||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p')
|
||||
|
||||
if not paras:
|
||||
raise ValueError('Invalid dfxp/TTML subtitle')
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.03.26'
|
||||
__version__ = '2016.04.06'
|
||||
|
||||
Reference in New Issue
Block a user