[afreecatv] Fix typo (#26970 )

[23video] Relax _VALID_URL (#26870 )
[utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851 )
2026-01-24 00:00:10 -05:00 · 2020-10-22 19:15:05 +07:00 · 2020-10-20 00:56:23 +07:00 · 2020-10-18 00:10:41 +07:00 · 2020-10-17 23:14:46 +07:00 · 2020-10-17 23:02:17 +07:00
10 changed files with 39 additions and 22 deletions
--- a/README.md
+++ b/README.md
@@ -545,7 +545,7 @@ The basic usage is not to set any template arguments when downloading a single f
 - `extractor` (string): Name of the extractor
 - `extractor_key` (string): Key name of the extractor
 - `epoch` (numeric): Unix epoch when creating the file
- - `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero
+ - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
 - `playlist` (string): Name or id of the playlist that contains the video
 - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
 - `playlist_id` (string): Playlist identifier
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -994,6 +994,12 @@ class TestUtil(unittest.TestCase):
        on = js_to_json('{42:4.2e1}')
        self.assertEqual(json.loads(on), {'42': 42.0})

+        on = js_to_json('{ "0x40": "0x40" }')
+        self.assertEqual(json.loads(on), {'0x40': '0x40'})
+
+        on = js_to_json('{ "040": "040" }')
+        self.assertEqual(json.loads(on), {'040': '040'})
+
    def test_js_to_json_malformed(self):
        self.assertEqual(js_to_json('42a1'), '42"a1"')
        self.assertEqual(js_to_json('42a-1'), '42"a"-1')
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -223,9 +223,10 @@ class HttpFD(FileDownloader):

            def retry(e):
                to_stdout = ctx.tmpfilename == '-'
-                if not to_stdout:
-                    ctx.stream.close()
-                ctx.stream = None
+                if ctx.stream is not None:
+                    if not to_stdout:
+                        ctx.stream.close()
+                    ctx.stream = None
                ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
                raise RetryDownload(e)

@@ -240,7 +241,7 @@ class HttpFD(FileDownloader):
                except socket.error as e:
                    # SSLError on python 2 (inherits socket.error) may have
                    # no errno set but this error message
-                    if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message') == 'The read operation timed out':
+                    if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out':
                        retry(e)
                    raise

--- a/youtube_dl/extractor/afreecatv.py
+++ b/youtube_dl/extractor/afreecatv.py
@@ -275,7 +275,7 @@ class AfreecaTVIE(InfoExtractor):
        video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
        if video_element is None or video_element.text is None:
            raise ExtractorError(
-                'Video %s video does not exist' % video_id, expected=True)
+                'Video %s does not exist' % video_id, expected=True)

        video_url = video_element.text.strip()

--- a/youtube_dl/extractor/expressen.py
+++ b/youtube_dl/extractor/expressen.py
@@ -15,7 +15,7 @@ from ..utils import (
 class ExpressenIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    https?://
-                        (?:www\.)?expressen\.se/
+                        (?:www\.)?(?:expressen|di)\.se/
                        (?:(?:tvspelare/video|videoplayer/embed)/)?
                        tv/(?:[^/]+/)*
                        (?P<id>[^/?#&]+)
@@ -42,13 +42,16 @@ class ExpressenIE(InfoExtractor):
    }, {
        'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
        'only_matching': True,
+    }, {
+        'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
+        'only_matching': True,
    }]

    @staticmethod
    def _extract_urls(webpage):
        return [
            mobj.group('url') for mobj in re.finditer(
-                r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
+                r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
                webpage)]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@@ -86,7 +86,8 @@ class IPrimaIE(InfoExtractor):
            (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
             r'data-product="([^"]+)">',
             r'id=["\']player-(p\d+)"',
-             r'playerId\s*:\s*["\']player-(p\d+)'),
+             r'playerId\s*:\s*["\']player-(p\d+)',
+             r'\bvideos\s*=\s*["\'](p\d+)'),
            webpage, 'real id')

        playerpage = self._download_webpage(
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dl/extractor/iqiyi.py
@@ -150,7 +150,7 @@ class IqiyiSDKInterpreter(object):
            elif function in other_functions:
                other_functions[function]()
            else:
-                raise ExtractorError('Unknown funcion %s' % function)
+                raise ExtractorError('Unknown function %s' % function)

        return sdk.target

--- a/youtube_dl/extractor/twentythreevideo.py
+++ b/youtube_dl/extractor/twentythreevideo.py
@@ -8,8 +8,8 @@ from ..utils import int_or_none

 class TwentyThreeVideoIE(InfoExtractor):
    IE_NAME = '23video'
-    _VALID_URL = r'https?://video\.(?P<domain>twentythree\.net|23video\.com|filmweb\.no)/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
-    _TEST = {
+    _VALID_URL = r'https?://(?P<domain>[^.]+\.(?:twentythree\.net|23video\.com|filmweb\.no))/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
+    _TESTS = [{
        'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1',
        'md5': '75fcf216303eb1dae9920d651f85ced4',
        'info_dict': {
@@ -21,11 +21,14 @@ class TwentyThreeVideoIE(InfoExtractor):
            'uploader_id': '12258964',
            'uploader': 'Rasmus Bysted',
        }
-    }
+    }, {
+        'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620',
+        'only_matching': True,
+    }]

    def _real_extract(self, url):
        domain, query, photo_id = re.match(self._VALID_URL, url).groups()
-        base_url = 'https://video.%s' % domain
+        base_url = 'https://%s' % domain
        photo_data = self._download_json(
            base_url + '/api/photo/list?' + query, photo_id, query={
                'format': 'json',
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -19,7 +19,7 @@ from ..utils import (


 class UstreamIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
    IE_NAME = 'ustream'
    _TESTS = [{
        'url': 'http://www.ustream.tv/recorded/20274954',
@@ -67,12 +67,15 @@ class UstreamIE(InfoExtractor):
        'params': {
            'skip_download': True,  # m3u8 download
        },
+    }, {
+        'url': 'https://video.ibm.com/embed/recorded/128240221?&autoplay=true&controls=true&volume=100',
+        'only_matching': True,
    }]

    @staticmethod
    def _extract_url(webpage):
        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage)
        if mobj is not None:
            return mobj.group('url')

--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4088,12 +4088,12 @@ def js_to_json(code):
                '\\\n': '',
                '\\x': '\\u00',
            }.get(m.group(0), m.group(0)), v[1:-1])
-
-        for regex, base in INTEGER_TABLE:
-            im = re.match(regex, v)
-            if im:
-                i = int(im.group(1), base)
-                return '"%d":' % i if v.endswith(':') else '%d' % i
+        else:
+            for regex, base in INTEGER_TABLE:
+                im = re.match(regex, v)
+                if im:
+                    i = int(im.group(1), base)
+                    return '"%d":' % i if v.endswith(':') else '%d' % i

        return '"%s"' % v
Author	SHA1	Message	Date
Toan Nguyen	48c5663c5f	[afreecatv] Fix typo (#26970 )	2020-10-22 19:15:05 +07:00
Hannu Hartikainen	7d740e7dc7	[23video] Relax _VALID_URL (#26870 )	2020-10-20 00:56:23 +07:00
Kevin O'Connor	4eda10499e	[utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851 ) The current logic in `js_to_json` tries to rewrite octal/hex numbers to decimal. However, when the logic actually happens the `"` or `'` have already been trimmed off. This causes what were originally strings, that happen to look like octal/hex numbers, to get rewritten to decimal and returned as a number rather than a string. In practive something like: ```js { "0x40": "foo", "040": "bar", } ``` would get rewritten as: ```json { 64: "foo", 32: "bar } ``` This is problematic since this isn't valid JSON as you cannot have non-string keys.	2020-10-18 00:10:41 +07:00
Sergio Livi	605535776a	[ustream] Add support for video.ibm.com (#26894 )	2020-10-17 23:14:46 +07:00
Felix Yan	1050e0d09f	[iqiyi] Fix typo (#26884 )	2020-10-17 23:02:17 +07:00
Sergey M․	d65d89183f	[expressen] Add support for di.se (closes #26670 )	2020-09-24 07:37:10 +07:00
Surkal	0c92f1e96b	[iprima] Improve video id extraction (#26507 ) (closes #26494 )	2020-09-24 06:46:58 +07:00
Sergey M․	adae9e844b	[README.md] Fix autonumber sequence description (refs #26686 )	2020-09-24 06:36:07 +07:00
Sergey M․	c5764b3f89	[downloader/http] Properly handle missing message in SSLError (closes #26646 )	2020-09-22 07:01:59 +07:00
Sergey M․	0837992a22	[downloader/http] Fix access to not yet opened stream in retry	2020-09-22 06:44:14 +07:00