Merge pull request #2567 from jaimeMF/sphinx-docs
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 23 Mar 2014 23:50:32 +0000 (00:50 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 23 Mar 2014 23:50:32 +0000 (00:50 +0100)
Add initial sphinx docs

43 files changed:
README.md
devscripts/release.sh
setup.cfg [new file with mode: 0644]
test/helper.py
test/test_all_urls.py
test/test_download.py
test/test_playlists.py
youtube_dl/InfoExtractors.py [deleted file]
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/aol.py [new file with mode: 0644]
youtube_dl/extractor/arte.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/cspan.py
youtube_dl/extractor/daum.py
youtube_dl/extractor/engadget.py [new file with mode: 0644]
youtube_dl/extractor/fivemin.py [new file with mode: 0644]
youtube_dl/extractor/generic.py
youtube_dl/extractor/instagram.py
youtube_dl/extractor/iprima.py
youtube_dl/extractor/kontrtube.py
youtube_dl/extractor/metacafe.py
youtube_dl/extractor/ninegag.py
youtube_dl/extractor/ooyala.py
youtube_dl/extractor/parliamentliveuk.py [new file with mode: 0644]
youtube_dl/extractor/pbs.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/rutv.py [moved from youtube_dl/extractor/vgtrk.py with 50% similarity]
youtube_dl/extractor/ted.py
youtube_dl/extractor/toypics.py [new file with mode: 0644]
youtube_dl/extractor/udemy.py
youtube_dl/extractor/vesti.py [new file with mode: 0644]
youtube_dl/extractor/vevo.py
youtube_dl/extractor/videolecturesnet.py [new file with mode: 0644]
youtube_dl/extractor/viki.py
youtube_dl/extractor/worldstarhiphop.py
youtube_dl/extractor/xbef.py [new file with mode: 0644]
youtube_dl/extractor/xtube.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/youtube.py
youtube_dl/utils.py
youtube_dl/version.py

index ccd94b2dcd65e70dbb7e262650081b7f7f6e156f..a10b13055694243ba7245d426d5baffc20fb8aaf 100644 (file)
--- a/README.md
+++ b/README.md
@@ -36,6 +36,9 @@ which means you can modify it, redistribute it or use it however you like.
                                      an empty string (--proxy "") for direct
                                      connection
     --no-check-certificate           Suppress HTTPS certificate validation.
+    --prefer-insecure                Use an unencrypted connection to retrieve
+                                     information about the video. (Currently
+                                     supported only for YouTube)
     --cache-dir DIR                  Location in the filesystem where youtube-dl
                                      can store some downloaded information
                                      permanently. By default $XDG_CACHE_HOME
@@ -191,9 +194,9 @@ which means you can modify it, redistribute it or use it however you like.
                                      preference using slashes: "-f 22/17/18".
                                      "-f mp4" and "-f flv" are also supported.
                                      You can also use the special names "best",
-                                     "bestaudio", "worst", and "worstaudio". By
-                                     default, youtube-dl will pick the best
-                                     quality.
+                                     "bestvideo", "bestaudio", "worst",
+                                     "worstvideo" and "worstaudio". By default,
+                                     youtube-dl will pick the best quality.
     --all-formats                    download all available video formats
     --prefer-free-formats            prefer free video formats unless a specific
                                      one is requested
index 72e708c7f79c24f06797c4de10e6334ecf6bb9ec..aa3119c424556f2a14a88a2aef61c18b2b44c7bf 100755 (executable)
@@ -70,7 +70,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
 git checkout HEAD -- youtube-dl youtube-dl.exe
 
 /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
-for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
+for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
 scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
 ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
 ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
@@ -97,7 +97,7 @@ rm -rf build
 
 make pypi-files
 echo "Uploading to PyPi ..."
-python setup.py sdist upload
+python setup.py sdist bdist_wheel upload
 make clean
 
 /bin/echo -e "\n### DONE!"
diff --git a/setup.cfg b/setup.cfg
new file mode 100644 (file)
index 0000000..e57d130
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[wheel]
+universal = True
index b1f421ac58331bad23328502f42a0e1316df853d..8739f816c148729158bf9859a8569e9167846d7f 100644 (file)
@@ -9,7 +9,10 @@ import sys
 
 import youtube_dl.extractor
 from youtube_dl import YoutubeDL
-from youtube_dl.utils import preferredencoding
+from youtube_dl.utils import (
+    compat_str,
+    preferredencoding,
+)
 
 
 def get_params(override=None):
@@ -71,7 +74,7 @@ class FakeYDL(YoutubeDL):
             old_report_warning(message)
         self.report_warning = types.MethodType(report_warning, self)
 
-def get_testcases():
+def gettestcases():
     for ie in youtube_dl.extractor.gen_extractors():
         t = getattr(ie, '_TEST', None)
         if t:
@@ -83,3 +86,45 @@ def get_testcases():
 
 
 md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
+
+
+def expect_info_dict(self, expected_dict, got_dict):
+    for info_field, expected in expected_dict.items():
+        if isinstance(expected, compat_str) and expected.startswith('re:'):
+            got = got_dict.get(info_field)
+            match_str = expected[len('re:'):]
+            match_rex = re.compile(match_str)
+
+            self.assertTrue(
+                isinstance(got, compat_str) and match_rex.match(got),
+                u'field %s (value: %r) should match %r' % (info_field, got, match_str))
+        elif isinstance(expected, type):
+            got = got_dict.get(info_field)
+            self.assertTrue(isinstance(got, expected),
+                u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
+        else:
+            if isinstance(expected, compat_str) and expected.startswith('md5:'):
+                got = 'md5:' + md5(got_dict.get(info_field))
+            else:
+                got = got_dict.get(info_field)
+            self.assertEqual(expected, got,
+                u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+
+    # Check for the presence of mandatory fields
+    for key in ('id', 'url', 'title', 'ext'):
+        self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
+    # Check for mandatory fields that are automatically set by YoutubeDL
+    for key in ['webpage_url', 'extractor', 'extractor_key']:
+        self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
+
+    # Are checkable fields missing from the test case definition?
+    test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
+        for key, value in got_dict.items()
+        if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
+    missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
+    if missing_keys:
+        sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
+        self.assertFalse(
+            missing_keys,
+            'Missing keys in test definition: %s' % (
+                ', '.join(sorted(missing_keys))))
index 047e84f192d977a436c28f8caf599981e4b25b26..39ac8b8a1188746348dd1c15058ec71ccc75b769 100644 (file)
@@ -9,7 +9,7 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
-from test.helper import get_testcases
+from test.helper import gettestcases
 
 from youtube_dl.extractor import (
     FacebookIE,
@@ -105,7 +105,7 @@ class TestAllURLsMatching(unittest.TestCase):
 
     def test_no_duplicates(self):
         ies = gen_extractors()
-        for tc in get_testcases():
+        for tc in gettestcases():
             url = tc['url']
             for ie in ies:
                 if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
@@ -141,6 +141,7 @@ class TestAllURLsMatching(unittest.TestCase):
     def test_pbs(self):
         # https://github.com/rg3/youtube-dl/issues/2350
         self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
+        self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
 
 if __name__ == '__main__':
     unittest.main()
index ca8c82f71d614021ed6cdf9894da856453dde380..f171c10bad84a876a9fe4caba2b71a984c3169ec 100644 (file)
@@ -8,17 +8,17 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from test.helper import (
     get_params,
-    get_testcases,
-    try_rm,
+    gettestcases,
+    expect_info_dict,
     md5,
-    report_warning
+    try_rm,
+    report_warning,
 )
 
 
 import hashlib
 import io
 import json
-import re
 import socket
 
 import youtube_dl.YoutubeDL
@@ -51,7 +51,7 @@ def _file_md5(fn):
     with open(fn, 'rb') as f:
         return hashlib.md5(f.read()).hexdigest()
 
-defs = get_testcases()
+defs = gettestcases()
 
 
 class TestDownload(unittest.TestCase):
@@ -135,40 +135,8 @@ def generator(test_case):
                     self.assertEqual(md5_for_file, tc['md5'])
                 with io.open(info_json_fn, encoding='utf-8') as infof:
                     info_dict = json.load(infof)
-                for (info_field, expected) in tc.get('info_dict', {}).items():
-                    if isinstance(expected, compat_str) and expected.startswith('re:'):
-                        got = info_dict.get(info_field)
-                        match_str = expected[len('re:'):]
-                        match_rex = re.compile(match_str)
-
-                        self.assertTrue(
-                            isinstance(got, compat_str) and match_rex.match(got),
-                            u'field %s (value: %r) should match %r' % (info_field, got, match_str))
-                    elif isinstance(expected, type):
-                        got = info_dict.get(info_field)
-                        self.assertTrue(isinstance(got, expected),
-                            u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
-                    else:
-                        if isinstance(expected, compat_str) and expected.startswith('md5:'):
-                            got = 'md5:' + md5(info_dict.get(info_field))
-                        else:
-                            got = info_dict.get(info_field)
-                        self.assertEqual(expected, got,
-                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
-
-                # Check for the presence of mandatory fields
-                for key in ('id', 'url', 'title', 'ext'):
-                    self.assertTrue(key in info_dict.keys() and info_dict[key])
-                # Check for mandatory fields that are automatically set by YoutubeDL
-                for key in ['webpage_url', 'extractor', 'extractor_key']:
-                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
-
-                # If checkable fields are missing from the test case, print the info_dict
-                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
-                    for key, value in info_dict.items()
-                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
-                if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
-                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
+
+                expect_info_dict(self, tc.get('info_dict', {}), info_dict)
         finally:
             try_rm_tcs_files()
 
index fbeed1c8cde5c02f100ca50cbe42e20203fe0155..b1e38e7e9ef29d4fa5bbabcecabec26bfce4bad6 100644 (file)
@@ -9,8 +9,10 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import FakeYDL
-
+from test.helper import (
+    expect_info_dict,
+    FakeYDL,
+)
 
 from youtube_dl.extractor import (
     AcademicEarthCourseIE,
@@ -37,6 +39,9 @@ from youtube_dl.extractor import (
     GoogleSearchIE,
     GenericIE,
     TEDIE,
+    ToypicsUserIE,
+    XTubeUserIE,
+    InstagramUserIE,
 )
 
 
@@ -269,5 +274,44 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], 'Who are the hackers?')
         self.assertTrue(len(result['entries']) >= 6)
 
+    def test_toypics_user(self):
+        dl = FakeYDL()
+        ie = ToypicsUserIE(dl)
+        result = ie.extract('http://videos.toypics.net/Mikey')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'Mikey')
+        self.assertTrue(len(result['entries']) >= 17)
+
+    def test_xtube_user(self):
+        dl = FakeYDL()
+        ie = XTubeUserIE(dl)
+        result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'greenshowers')
+        self.assertTrue(len(result['entries']) >= 155)
+
+    def test_InstagramUser(self):
+        dl = FakeYDL()
+        ie = InstagramUserIE(dl)
+        result = ie.extract('http://instagram.com/porsche')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'porsche')
+        self.assertTrue(len(result['entries']) >= 2)
+        test_video = next(
+            e for e in result['entries']
+            if e['id'] == '614605558512799803_462752227')
+        dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
+        dl.process_video_result(test_video, download=False)
+        EXPECTED = {
+            'id': '614605558512799803_462752227',
+            'ext': 'mp4',
+            'title': '#Porsche Intelligent Performance.',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'uploader': 'Porsche',
+            'uploader_id': 'porsche',
+        }
+        expect_info_dict(self, EXPECTED, test_video)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
deleted file mode 100755 (executable)
index 672ef9e..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-# Legacy file for backwards compatibility, use youtube_dl.extractor instead!
-
-from .extractor.common import InfoExtractor, SearchInfoExtractor
-from .extractor import gen_extractors, get_info_extractor
index 5095f87d29b36287543a0dac86dcf4a50efb68ab..d18d6dd00e57e6eb7d8c5213a4b6d46ffb65df13 100644 (file)
@@ -148,6 +148,8 @@ class YoutubeDL(object):
                        again.
     cookiefile:        File name where cookies should be read from and dumped to.
     nocheckcertificate:Do not verify SSL certificates
+    prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
+                       At the moment, this is only supported by YouTube.
     proxy:             URL of the proxy server to use
     socket_timeout:    Time to wait for unresponsive hosts, in seconds
     bidi_workaround:   Work around buggy terminals without bidirectional text
@@ -510,13 +512,7 @@ class YoutubeDL(object):
                         '_type': 'compat_list',
                         'entries': ie_result,
                     }
-                self.add_extra_info(ie_result,
-                    {
-                        'extractor': ie.IE_NAME,
-                        'webpage_url': url,
-                        'webpage_url_basename': url_basename(url),
-                        'extractor_key': ie.ie_key(),
-                    })
+                self.add_default_extra_info(ie_result, ie, url)
                 if process:
                     return self.process_ie_result(ie_result, download, extra_info)
                 else:
@@ -533,7 +529,15 @@ class YoutubeDL(object):
                 else:
                     raise
         else:
-            self.report_error('no suitable InfoExtractor: %s' % url)
+            self.report_error('no suitable InfoExtractor for URL %s' % url)
+
+    def add_default_extra_info(self, ie_result, ie, url):
+        self.add_extra_info(ie_result, {
+            'extractor': ie.IE_NAME,
+            'webpage_url': url,
+            'webpage_url_basename': url_basename(url),
+            'extractor_key': ie.ie_key(),
+        })
 
     def process_ie_result(self, ie_result, download=True, extra_info={}):
         """
index 0e9504c1485cdc4a6ba09968f1331615f0b8942f..a4cbdb0bdbea7ad238fad1e828a14a106b9cdcd4 100644 (file)
@@ -56,7 +56,6 @@ __authors__  = (
 __license__ = 'Public Domain'
 
 import codecs
-import getpass
 import io
 import locale
 import optparse
@@ -68,6 +67,7 @@ import sys
 
 
 from .utils import (
+    compat_getpass,
     compat_print,
     DateRange,
     decodeOption,
@@ -237,6 +237,9 @@ def parseOpts(overrideArguments=None):
         '--proxy', dest='proxy', default=None, metavar='URL',
         help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
+    general.add_option(
+        '--prefer-insecure', action='store_true', dest='prefer_insecure',
+        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
     general.add_option(
         '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
         help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
@@ -257,7 +260,6 @@ def parseOpts(overrideArguments=None):
         action='store_true',
         help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
 
-
     selection.add_option(
         '--playlist-start',
         dest='playliststart', metavar='NUMBER', default=1, type=int,
@@ -611,7 +613,7 @@ def _real_main(argv=None):
     if opts.usetitle and opts.useid:
         parser.error(u'using title conflicts with using video ID')
     if opts.username is not None and opts.password is None:
-        opts.password = getpass.getpass(u'Type account password and press return:')
+        opts.password = compat_getpass(u'Type account password and press [Return]: ')
     if opts.ratelimit is not None:
         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
         if numeric_limit is None:
@@ -756,6 +758,7 @@ def _real_main(argv=None):
         'download_archive': download_archive_fn,
         'cookiefile': opts.cookiefile,
         'nocheckcertificate': opts.no_check_certificate,
+        'prefer_insecure': opts.prefer_insecure,
         'proxy': opts.proxy,
         'socket_timeout': opts.socket_timeout,
         'bidi_workaround': opts.bidi_workaround,
index 313414e7d9c667fde3260afdb69b39486f2079d0..3e728e87606f5e08bb94a1a0fa36735b35cc3796 100644 (file)
@@ -2,6 +2,7 @@ from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
 from .aftonbladet import AftonbladetIE
 from .anitube import AnitubeIE
+from .aol import AolIE
 from .aparat import AparatIE
 from .appletrailers import AppleTrailersIE
 from .archiveorg import ArchiveOrgIE
@@ -10,6 +11,7 @@ from .arte import (
     ArteTvIE,
     ArteTVPlus7IE,
     ArteTVCreativeIE,
+    ArteTVConcertIE,
     ArteTVFutureIE,
     ArteTVDDCIE,
 )
@@ -63,6 +65,7 @@ from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .eitb import EitbIE
 from .elpais import ElPaisIE
+from .engadget import EngadgetIE
 from .escapist import EscapistIE
 from .everyonesmixtape import EveryonesMixtapeIE
 from .exfm import ExfmIE
@@ -71,6 +74,7 @@ from .facebook import FacebookIE
 from .faz import FazIE
 from .firstpost import FirstpostIE
 from .firsttv import FirstTVIE
+from .fivemin import FiveMinIE
 from .fktv import (
     FKTVIE,
     FKTVPosteckeIE,
@@ -108,7 +112,7 @@ from .imdb import (
 )
 from .ina import InaIE
 from .infoq import InfoQIE
-from .instagram import InstagramIE
+from .instagram import InstagramIE, InstagramUserIE
 from .internetvideoarchive import InternetVideoArchiveIE
 from .iprima import IPrimaIE
 from .ivi import (
@@ -173,6 +177,7 @@ from .nowness import NownessIE
 from .nowvideo import NowVideoIE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
+from .parliamentliveuk import ParliamentLiveUKIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .playvid import PlayvidIE
@@ -196,6 +201,7 @@ from .rutube import (
     RutubeMovieIE,
     RutubePersonIE,
 )
+from .rutv import RUTVIE
 from .savefrom import SaveFromIE
 from .servingsys import ServingSysIE
 from .sina import SinaIE
@@ -233,6 +239,7 @@ from .theplatform import ThePlatformIE
 from .thisav import ThisAVIE
 from .tinypic import TinyPicIE
 from .toutv import TouTvIE
+from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .trutube import TruTubeIE
@@ -251,12 +258,13 @@ from .ustream import UstreamIE, UstreamChannelIE
 from .vbox7 import Vbox7IE
 from .veehd import VeeHDIE
 from .veoh import VeohIE
+from .vesti import VestiIE
 from .vevo import VevoIE
-from .vgtrk import VGTRKIE
 from .vice import ViceIE
 from .viddler import ViddlerIE
 from .videobam import VideoBamIE
 from .videodetective import VideoDetectiveIE
+from .videolecturesnet import VideoLecturesNetIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .vimeo import (
@@ -277,10 +285,11 @@ from .weibo import WeiboIE
 from .wimp import WimpIE
 from .wistia import WistiaIE
 from .worldstarhiphop import WorldStarHipHopIE
+from .xbef import XBefIE
 from .xhamster import XHamsterIE
 from .xnxx import XNXXIE
 from .xvideos import XVideosIE
-from .xtube import XTubeIE
+from .xtube import XTubeUserIE, XTubeIE
 from .yahoo import (
     YahooIE,
     YahooNewsIE,
diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py
new file mode 100644 (file)
index 0000000..abc6689
--- /dev/null
@@ -0,0 +1,28 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .fivemin import FiveMinIE
+
+
+class AolIE(InfoExtractor):
+    IE_NAME = 'on.aol.com'
+    _VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
+
+    _TEST = {
+        'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
+        'md5': '18ef68f48740e86ae94b98da815eec42',
+        'info_dict': {
+            'id': '518167793',
+            'ext': 'mp4',
+            'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
+        },
+        'add_ie': ['FiveMin'],
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        self.to_screen('Downloading 5min.com video %s' % video_id)
+        return FiveMinIE._build_result(video_id)
index d194f25643f2e26839686f372b404293db831ae2..979481b2198134859bfbdf79302e189c4ad363fc 100644 (file)
@@ -131,7 +131,7 @@ class ArteTvIE(InfoExtractor):
 
 class ArteTVPlus7IE(InfoExtractor):
     IE_NAME = 'arte.tv:+7'
-    _VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
+    _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
 
     @classmethod
     def _extract_url_info(cls, url):
@@ -202,6 +202,8 @@ class ArteTVPlus7IE(InfoExtractor):
                     re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
                     # The version with sourds/mal subtitles has also lower relevance
                     re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
+                    # Prefer http downloads over m3u8
+                    0 if f['url'].endswith('m3u8') else 1,
                 )
         formats = sorted(formats, key=sort_key)
         def _format(format_info):
@@ -242,8 +244,9 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
 
     _TEST = {
         'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
-        'file': '050489-002.mp4',
         'info_dict': {
+            'id': '050489-002',
+            'ext': 'mp4',
             'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
         },
     }
@@ -255,8 +258,9 @@ class ArteTVFutureIE(ArteTVPlus7IE):
 
     _TEST = {
         'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
-        'file': '050940-003.mp4',
         'info_dict': {
+            'id': '050940-003',
+            'ext': 'mp4',
             'title': 'Les champignons au secours de la planète',
         },
     }
@@ -270,7 +274,7 @@ class ArteTVFutureIE(ArteTVPlus7IE):
 
 class ArteTVDDCIE(ArteTVPlus7IE):
     IE_NAME = 'arte.tv:ddc'
-    _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
+    _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
 
     def _real_extract(self, url):
         video_id, lang = self._extract_url_info(url)
@@ -284,3 +288,20 @@ class ArteTVDDCIE(ArteTVPlus7IE):
         javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
         json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
         return self._extract_from_json_url(json_url, video_id, lang)
+
+
+class ArteTVConcertIE(ArteTVPlus7IE):
+    IE_NAME = 'arte.tv:concert'
+    _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
+
+    _TEST = {
+        'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
+        'md5': '9ea035b7bd69696b67aa2ccaaa218161',
+        'info_dict': {
+            'id': '186',
+            'ext': 'mp4',
+            'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
+            'upload_date': '20140128',
+            'description': 'md5:486eb08f991552ade77439fe6d82c305',
+        },
+    }
index ed3986f313a149f0db4a69dc92762730297ced1a..d50fcdbdbb0fc23becdf6a254769667da44cb9c4 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class ComedyCentralIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
+    _VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/
         (video-clips|episodes|cc-studios|video-collections)
         /(?P<title>.*)'''
     _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
index d65046f588d0bf4481ec6f8d8de7e031e6bdb2f9..2a8eda9eff3ce9364a3e8702c7422cb364dab582 100644 (file)
@@ -10,9 +10,9 @@ from ..utils import (
 
 
 class CSpanIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
+    _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
     IE_DESC = 'C-SPAN'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
         'md5': '8e44ce11f0f725527daccc453f553eb0',
         'info_dict': {
@@ -22,13 +22,24 @@ class CSpanIE(InfoExtractor):
             'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
         },
         'skip': 'Regularly fails on travis, for unknown reasons',
-    }
+    }, {
+        'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
+        # For whatever reason, the served video alternates between
+        # two different ones
+        #'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
+        'info_dict': {
+            'id': '340723',
+            'ext': 'mp4',
+            'title': 'International Health Care Models',
+            'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
+        }
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         page_id = mobj.group('id')
         webpage = self._download_webpage(url, page_id)
-        video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
+        video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
 
         description = self._html_search_regex(
             [
index 4876ecb4812710e2509eec8fc19f00dac60d2fde..6033cd94a1b251d66e7a3f80034bc58b79fa4b55 100644 (file)
@@ -1,25 +1,28 @@
 # encoding: utf-8
+
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
 from ..utils import (
     compat_urllib_parse,
-    determine_ext,
 )
 
 
 class DaumIE(InfoExtractor):
     _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
-    IE_NAME = u'daum.net'
+    IE_NAME = 'daum.net'
 
     _TEST = {
-        u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
-        u'file': u'52554690.mp4',
-        u'info_dict': {
-            u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
-            u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
-            u'upload_date': u'20130831',
-            u'duration': 3868,
+        'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
+        'info_dict': {
+            'id': '52554690',
+            'ext': 'mp4',
+            'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
+            'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
+            'upload_date': '20130831',
+            'duration': 3868,
         },
     }
 
@@ -30,14 +33,14 @@ class DaumIE(InfoExtractor):
         webpage = self._download_webpage(canonical_url, video_id)
         full_id = self._search_regex(
             r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
-            webpage, u'full id')
+            webpage, 'full id')
         query = compat_urllib_parse.urlencode({'vid': full_id})
         info = self._download_xml(
             'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
-            u'Downloading video info')
+            'Downloading video info')
         urls = self._download_xml(
             'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
-            video_id, u'Downloading video formats info')
+            video_id, 'Downloading video formats info')
 
         self.to_screen(u'%s: Getting video urls' % video_id)
         formats = []
@@ -53,7 +56,6 @@ class DaumIE(InfoExtractor):
             format_url = url_doc.find('result/url').text
             formats.append({
                 'url': format_url,
-                'ext': determine_ext(format_url),
                 'format_id': profile,
             })
 
diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py
new file mode 100644 (file)
index 0000000..92ada81
--- /dev/null
@@ -0,0 +1,43 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .fivemin import FiveMinIE
+from ..utils import (
+    url_basename,
+)
+
+
+class EngadgetIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://www.engadget.com/
+        (?:video/5min/(?P<id>\d+)|
+            [\d/]+/.*?)
+        '''
+
+    _TEST = {
+        'url': 'http://www.engadget.com/video/5min/518153925/',
+        'md5': 'c6820d4828a5064447a4d9fc73f312c9',
+        'info_dict': {
+            'id': '518153925',
+            'ext': 'mp4',
+            'title': 'Samsung Galaxy Tab Pro 8.4 Review',
+        },
+        'add_ie': ['FiveMin'],
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        if video_id is not None:
+            return FiveMinIE._build_result(video_id)
+        else:
+            title = url_basename(url)
+            webpage = self._download_webpage(url, title)
+            ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
+            return {
+                '_type': 'playlist',
+                'title': title,
+                'entries': [FiveMinIE._build_result(id) for id in ids]
+            }
diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py
new file mode 100644 (file)
index 0000000..215cc83
--- /dev/null
@@ -0,0 +1,56 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_str,
+)
+
+
+class FiveMinIE(InfoExtractor):
+    IE_NAME = '5min'
+    _VALID_URL = r'''(?x)
+        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(.*?&)?playList=|
+            5min:)
+        (?P<id>\d+)
+        '''
+
+    _TEST = {
+        # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
+        'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
+        'md5': '4f7b0b79bf1a470e5004f7112385941d',
+        'info_dict': {
+            'id': '518013791',
+            'ext': 'mp4',
+            'title': 'iPad Mini with Retina Display Review',
+        },
+    }
+
+    @classmethod
+    def _build_result(cls, video_id):
+        return cls.url_result('5min:%s' % video_id, cls.ie_key())
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info = self._download_json(
+            'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&'
+            'playlist=%s&url=https' % video_id,
+            video_id)['binding'][0]
+
+        second_id = compat_str(int(video_id[:-2]) + 1)
+        formats = []
+        for quality, height in [(1, 320), (2, 480), (4, 720), (8, 1080)]:
+            if any(r['ID'] == quality for r in info['Renditions']):
+                formats.append({
+                    'format_id': compat_str(quality),
+                    'url': 'http://avideos.5min.com/%s/%s/%s_%s.mp4' % (second_id[-3:], second_id, video_id, quality),
+                    'height': height,
+                })
+
+        return {
+            'id': video_id,
+            'title': info['Title'],
+            'formats': formats,
+        }
index 6e632477921c17bfd0dff837074b72d4ae83abed..4d649fe717c2afa9ff2040b0c0b6cd62ce4285bb 100644 (file)
@@ -24,6 +24,7 @@ from ..utils import (
 )
 from .brightcove import BrightcoveIE
 from .ooyala import OoyalaIE
+from .rutv import RUTVIE
 
 
 class GenericIE(InfoExtractor):
@@ -101,6 +102,20 @@ class GenericIE(InfoExtractor):
                 'title': '2cc213299525360.mov',  # that's what we get
             },
         },
+        # second style of embedded ooyala videos
+        {
+            'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
+            'info_dict': {
+                'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
+                'ext': 'mp4',
+                'title': 'Behind-the-scenes: Financial Review Sunday ',
+                'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
         # google redirect
         {
             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@@ -143,8 +158,45 @@ class GenericIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
+            },
+        },
+        # RUTV embed
+        {
+            'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
+            'info_dict': {
+                'id': '776940',
+                'ext': 'mp4',
+                'title': 'Охотское море стало целиком российским',
+                'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        # Embedded TED video
+        {
+            'url': 'http://en.support.wordpress.com/videos/ted-talks/',
+            'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
+            'info_dict': {
+                'id': '981',
+                'ext': 'mp4',
+                'title': 'My web playroom',
+                'uploader': 'Ze Frank',
+                'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
             }
         },
+        # nowvideo embed hidden behind percent encoding
+        {
+            'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
+            'md5': '2baf4ddd70f697d94b1c18cf796d5107',
+            'info_dict': {
+                'id': '06e53103ca9aa',
+                'ext': 'flv',
+                'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
+                'description': 'No description',
+            },
+        },
     ]
 
     def report_download_webpage(self, video_id):
@@ -170,9 +222,14 @@ class GenericIE(InfoExtractor):
                     newurl = newurl.replace(' ', '%20')
                     newheaders = dict((k,v) for k,v in req.headers.items()
                                       if k.lower() not in ("content-length", "content-type"))
+                    try:
+                        # This function was deprecated in python 3.3 and removed in 3.4
+                        origin_req_host = req.get_origin_req_host()
+                    except AttributeError:
+                        origin_req_host = req.origin_req_host
                     return HEADRequest(newurl,
                                        headers=newheaders,
-                                       origin_req_host=req.get_origin_req_host(),
+                                       origin_req_host=origin_req_host,
                                        unverifiable=True)
                 else:
                     raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
@@ -291,6 +348,11 @@ class GenericIE(InfoExtractor):
         except compat_xml_parse_error:
             pass
 
+        # Sometimes embedded video player is hidden behind percent encoding
+        # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
+        # Unescaping the whole page allows to handle those cases in a generic way
+        webpage = compat_urllib_parse.unquote(webpage)
+
         # it's tempting to parse this further, but you would
         # have to take into account all the variations like
         #   Video Title - Site Name
@@ -324,9 +386,9 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded (iframe) Vimeo player
         mobj = re.search(
-            r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
         if mobj:
-            player_url = unescapeHTML(mobj.group(1))
+            player_url = unescapeHTML(mobj.group('url'))
             surl = smuggle_url(player_url, {'Referer': url})
             return self.url_result(surl, 'Vimeo')
 
@@ -392,9 +454,10 @@ class GenericIE(InfoExtractor):
             return self.url_result(mobj.group('url'))
 
         # Look for Ooyala videos
-        mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
+        mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
+             re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
         if mobj is not None:
-            return OoyalaIE._build_url_result(mobj.group(1))
+            return OoyalaIE._build_url_result(mobj.group('ec'))
 
         # Look for Aparat videos
         mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
@@ -451,6 +514,17 @@ class GenericIE(InfoExtractor):
             return self.playlist_result(
                 urlrs, playlist_id=video_id, playlist_title=video_title)
 
+        # Look for embedded RUTV player
+        rutv_url = RUTVIE._extract_url(webpage)
+        if rutv_url:
+            return self.url_result(rutv_url, 'RUTV')
+
+        # Look for embedded TED player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'TED')
+
         # Start with something easy: JW Player in SWFObject
         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
         if mobj is None:
@@ -462,6 +536,7 @@ class GenericIE(InfoExtractor):
         if mobj is None:
             # Broaden the search a little bit: JWPlayer JS loader
             mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
+
         if mobj is None:
             # Try to find twitter cards info
             mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
index 63141af272ac077ed97dcd5baf4c5a0dcb7d3b47..994f0e4aefa7f52bf8ba2273ab1426958a2f830e 100644 (file)
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+)
 
 
 class InstagramIE(InfoExtractor):
@@ -37,3 +40,68 @@ class InstagramIE(InfoExtractor):
             'uploader_id': uploader_id,
             'description': desc,
         }
+
+
+class InstagramUserIE(InfoExtractor):
+    _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+    IE_DESC = 'Instagram user profile'
+    IE_NAME = 'instagram:user'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        uploader_id = mobj.group('username')
+
+        entries = []
+        page_count = 0
+        media_url = 'http://instagram.com/%s/media' % uploader_id
+        while True:
+            page = self._download_json(
+                media_url, uploader_id,
+                note='Downloading page %d ' % (page_count + 1),
+            )
+            page_count += 1
+
+            for it in page['items']:
+                if it.get('type') != 'video':
+                    continue
+                like_count = int_or_none(it.get('likes', {}).get('count'))
+                user = it.get('user', {})
+
+                formats = [{
+                    'format_id': k,
+                    'height': v.get('height'),
+                    'width': v.get('width'),
+                    'url': v['url'],
+                } for k, v in it['videos'].items()]
+                self._sort_formats(formats)
+
+                thumbnails_el = it.get('images', {})
+                thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
+
+                title = it.get('caption', {}).get('text', it['id'])
+
+                entries.append({
+                    'id': it['id'],
+                    'title': title,
+                    'formats': formats,
+                    'thumbnail': thumbnail,
+                    'webpage_url': it.get('link'),
+                    'uploader': user.get('full_name'),
+                    'uploader_id': user.get('username'),
+                    'like_count': like_count,
+                    'upload_timestamp': int_or_none(it.get('created_time')),
+                })
+
+            if not page['items']:
+                break
+            max_id = page['items'][-1]['id']
+            media_url = (
+                'http://instagram.com/%s/media?max_id=%s' % (
+                    uploader_id, max_id))
+
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'id': uploader_id,
+            'title': uploader_id,
+        }
index 2a29e6072bc78b0afe5fe2460cd9d1ea45565203..d1defd363c5fe9c86330236f53b8aa21bfe65a38 100644 (file)
@@ -48,7 +48,7 @@ class IPrimaIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        if re.search(r'Nemáte oprávnění přistupovat na tuto stránku.\s*</div>', webpage):
+        if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage):
             raise ExtractorError(
                 '%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)
 
index 1b45b67b0579d9fb06462f587651bc8f83e4751d..5341ac773f79fe237626bdfe3243bd1561d8003d 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import int_or_none
 
 
 class KontrTubeIE(InfoExtractor):
@@ -32,27 +33,26 @@ class KontrTubeIE(InfoExtractor):
 
         video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
         thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
-        title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
-            'video title')
+        title = self._html_search_regex(
+            r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title')
         description = self._html_search_meta('description', webpage, 'video description')
 
-        mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
-            webpage)
+        mobj = re.search(
+            r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
         duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
 
-        view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
-            'view count', fatal=False)
-        view_count = int(view_count) if view_count is not None else None
+        view_count = self._html_search_regex(
+            r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
 
         comment_count = None
-        comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
-            fatal=False)
+        comment_str = self._html_search_regex(
+            r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
         if comment_str.startswith('комментариев нет'):
             comment_count = 0
         else:
             mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
             if mobj:
-                comment_count = int(mobj.group('total'))
+                comment_count = mobj.group('total')
 
         return {
             'id': video_id,
@@ -61,6 +61,6 @@ class KontrTubeIE(InfoExtractor):
             'title': title,
             'description': description,
             'duration': duration,
-            'view_count': view_count,
-            'comment_count': comment_count,
+            'view_count': int_or_none(view_count),
+            'comment_count': int_or_none(comment_count),
         }
\ No newline at end of file
index 30103119785ed9ca530fc7fe2904603f5f64a450..6436c05a3cd8e3f25499b9ff911de837a6c98207 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -9,104 +11,103 @@ from ..utils import (
     ExtractorError,
 )
 
-class MetacafeIE(InfoExtractor):
-    """Information Extractor for metacafe.com."""
 
-    _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
+class MetacafeIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
     _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
     _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
-    IE_NAME = u'metacafe'
+    IE_NAME = 'metacafe'
     _TESTS = [
-    # Youtube video
-    {
-        u"add_ie": ["Youtube"],
-        u"url":  u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
-        u"file":  u"_aUehQsCQtM.mp4",
-        u"info_dict": {
-            u"upload_date": u"20090102",
-            u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
-            u"description": u"md5:2439a8ef6d5a70e380c22f5ad323e5a8",
-            u"uploader": u"PBS",
-            u"uploader_id": u"PBS"
-        }
-    },
-    # Normal metacafe video
-    {
-        u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
-        u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
-        u'info_dict': {
-            u'id': u'11121940',
-            u'ext': u'mp4',
-            u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
-            u'uploader': u'ign',
-            u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
+        # Youtube video
+        {
+            'add_ie': ['Youtube'],
+            'url':  'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
+            'info_dict': {
+                'id': '_aUehQsCQtM',
+                'ext': 'mp4',
+                'upload_date': '20090102',
+                'title': 'The Electric Company | "Short I" | PBS KIDS GO!',
+                'description': 'md5:2439a8ef6d5a70e380c22f5ad323e5a8',
+                'uploader': 'PBS',
+                'uploader_id': 'PBS'
+            }
         },
-    },
-    # AnyClip video
-    {
-        u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
-        u"file": u"an-dVVXnuY7Jh77J.mp4",
-        u"info_dict": {
-            u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
-            u"uploader": u"anyclip",
-            u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
+        # Normal metacafe video
+        {
+            'url': 'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
+            'md5': '6e0bca200eaad2552e6915ed6fd4d9ad',
+            'info_dict': {
+                'id': '11121940',
+                'ext': 'mp4',
+                'title': 'News: Stuff You Won\'t Do with Your PlayStation 4',
+                'uploader': 'ign',
+                'description': 'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
+            },
         },
-    },
-    # age-restricted video
-    {
-        u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
-        u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
-        u'info_dict': {
-            u'id': u'5186653',
-            u'ext': u'mp4',
-            u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
-            u'uploader': u'Dwayne Pipe',
-            u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
-            u'age_limit': 18,
+        # AnyClip video
+        {
+            'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
+            'info_dict': {
+                'id': 'an-dVVXnuY7Jh77J',
+                'ext': 'mp4',
+                'title': 'The Andromeda Strain (1971): Stop the Bomb Part 3',
+                'uploader': 'anyclip',
+                'description': 'md5:38c711dd98f5bb87acf973d573442e67',
+            },
         },
-    },
-    # cbs video
-    {
-        u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
-        u'info_dict': {
-            u'id': u'0rOxMBabDXN6',
-            u'ext': u'flv',
-            u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
-            u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
-            u'duration': 129,
+        # age-restricted video
+        {
+            'url': 'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
+            'md5': '98dde7c1a35d02178e8ab7560fe8bd09',
+            'info_dict': {
+                'id': '5186653',
+                'ext': 'mp4',
+                'title': 'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
+                'uploader': 'Dwayne Pipe',
+                'description': 'md5:950bf4c581e2c059911fa3ffbe377e4b',
+                'age_limit': 18,
+            },
         },
-        u'params': {
-            # rtmp download
-            u'skip_download': True,
+        # cbs video
+        {
+            'url': 'http://www.metacafe.com/watch/cb-8VD4r_Zws8VP/open_this_is_face_the_nation_february_9/',
+            'info_dict': {
+                'id': '8VD4r_Zws8VP',
+                'ext': 'flv',
+                'title': 'Open: This is Face the Nation, February 9',
+                'description': 'md5:8a9ceec26d1f7ed6eab610834cc1a476',
+                'duration': 96,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
         },
-    },
     ]
 
-
     def report_disclaimer(self):
-        """Report disclaimer retrieval."""
-        self.to_screen(u'Retrieving disclaimer')
+        self.to_screen('Retrieving disclaimer')
 
     def _real_initialize(self):
         # Retrieve disclaimer
         self.report_disclaimer()
-        self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
+        self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer')
 
         # Confirm age
         disclaimer_form = {
             'filters': '0',
             'submit': "Continue - I'm over 18",
-            }
+        }
         request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         self.report_age_confirmation()
-        self._download_webpage(request, None, False, u'Unable to confirm age')
+        self._download_webpage(request, None, False, 'Unable to confirm age')
 
     def _real_extract(self, url):
         # Extract id and simplified title from URL
         mobj = re.match(self._VALID_URL, url)
         if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)
 
         video_id = mobj.group(1)
 
@@ -153,23 +154,24 @@ class MetacafeIE(InfoExtractor):
             else:
                 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
                 if mobj is None:
-                    raise ExtractorError(u'Unable to extract media URL')
+                    raise ExtractorError('Unable to extract media URL')
                 vardict = compat_parse_qs(mobj.group(1))
                 if 'mediaData' not in vardict:
-                    raise ExtractorError(u'Unable to extract media URL')
-                mobj = re.search(r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
+                    raise ExtractorError('Unable to extract media URL')
+                mobj = re.search(
+                    r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
                 if mobj is None:
-                    raise ExtractorError(u'Unable to extract media URL')
+                    raise ExtractorError('Unable to extract media URL')
                 mediaURL = mobj.group('mediaURL').replace('\\/', '/')
                 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
                 video_ext = determine_ext(video_url)
 
-        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
+        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, 'title')
         description = self._og_search_description(webpage)
         thumbnail = self._og_search_thumbnail(webpage)
         video_uploader = self._html_search_regex(
                 r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
-                webpage, u'uploader nickname', fatal=False)
+                webpage, 'uploader nickname', fatal=False)
 
         if re.search(r'"contentRating":"restricted"', webpage) is not None:
             age_limit = 18
@@ -177,14 +179,12 @@ class MetacafeIE(InfoExtractor):
             age_limit = 0
 
         return {
-            '_type':    'video',
-            'id':       video_id,
-            'url':      video_url,
+            'id': video_id,
+            'url': video_url,
             'description': description,
             'uploader': video_uploader,
-            'upload_date':  None,
-            'title':    video_title,
+            'title': video_title,
             'thumbnail':thumbnail,
-            'ext':      video_ext,
+            'ext': video_ext,
             'age_limit': age_limit,
         }
index 1d7aa40ed7f1be0ac0348b18f64fd046fc08204e..b8c892ccefc156a5a28b945384553af70e67a909 100644 (file)
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
@@ -12,8 +11,9 @@ class NineGagIE(InfoExtractor):
 
     _TEST = {
         "url": "http://9gag.tv/v/1912",
-        "file": "1912.mp4",
         "info_dict": {
+            "id": "1912",
+            "ext": "mp4",
             "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
             "title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
             "view_count": int,
index 44312ba4ecf61220ad21e8d233a40e99960389b2..e20327791c748617363430dee6dd8803f73bda30 100644 (file)
@@ -1,20 +1,23 @@
+from __future__ import unicode_literals
 import re
 import json
 
 from .common import InfoExtractor
 from ..utils import unescapeHTML
 
+
 class OoyalaIE(InfoExtractor):
     _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
 
     _TEST = {
         # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
-        u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
-        u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
-        u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
-        u'info_dict': {
-            u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
-            u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+        'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+        'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
+        'info_dict': {
+            'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+            'ext': 'mp4',
+            'title': 'Explaining Data Recovery from Hard Drives and SSDs',
+            'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
         },
     }
 
@@ -28,13 +31,14 @@ class OoyalaIE(InfoExtractor):
             ie=cls.ie_key())
 
     def _extract_result(self, info, more_info):
-        return {'id': info['embedCode'],
-                'ext': 'mp4',
-                'title': unescapeHTML(info['title']),
-                'url': info.get('ipad_url') or info['url'],
-                'description': unescapeHTML(more_info['description']),
-                'thumbnail': more_info['promo'],
-                }
+        return {
+            'id': info['embedCode'],
+            'ext': 'mp4',
+            'title': unescapeHTML(info['title']),
+            'url': info.get('ipad_url') or info['url'],
+            'description': unescapeHTML(more_info['description']),
+            'thumbnail': more_info['promo'],
+        }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -42,22 +46,23 @@ class OoyalaIE(InfoExtractor):
         player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
         player = self._download_webpage(player_url, embedCode)
         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
-                                        player, u'mobile player url')
+                                        player, 'mobile player url')
         mobile_player = self._download_webpage(mobile_url, embedCode)
         videos_info = self._search_regex(
             r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
-            mobile_player, u'info').replace('\\"','"')
-        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
+            mobile_player, 'info').replace('\\"','"')
+        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
         videos_info = json.loads(videos_info)
         videos_more_info =json.loads(videos_more_info)
 
         if videos_more_info.get('lineup'):
             videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
-            return {'_type': 'playlist',
-                    'id': embedCode,
-                    'title': unescapeHTML(videos_more_info['title']),
-                    'entries': videos,
-                    }
+            return {
+                '_type': 'playlist',
+                'id': embedCode,
+                'title': unescapeHTML(videos_more_info['title']),
+                'entries': videos,
+            }
         else:
             return self._extract_result(videos_info[0], videos_more_info)
         
diff --git a/youtube_dl/extractor/parliamentliveuk.py b/youtube_dl/extractor/parliamentliveuk.py
new file mode 100644 (file)
index 0000000..0a423a0
--- /dev/null
@@ -0,0 +1,53 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class ParliamentLiveUKIE(InfoExtractor):
+    IE_NAME = 'parliamentlive.tv'
+    IE_DESC = 'UK parliament videos'
+    _VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia',
+        'info_dict': {
+            'id': '15121',
+            'ext': 'asf',
+            'title': 'hoc home affairs committee, 18 mar 2014.pm',
+            'description': 'md5:033b3acdf83304cd43946b2d5e5798d1',
+        },
+        'params': {
+            'skip_download': True,  # Requires mplayer (mms)
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        asx_url = self._html_search_regex(
+            r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage,
+            'metadata URL')
+        asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata')
+        video_url = asx.find('.//REF').attrib['HREF']
+
+        title = self._search_regex(
+            r'''(?x)player\.setClipDetails\(
+                (?:(?:[0-9]+|"[^"]+"),\s*){2}
+                "([^"]+",\s*"[^"]+)"
+                ''',
+            webpage, 'title').replace('", "', ', ')
+        description = self._html_search_regex(
+            r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>',
+            webpage, 'description')
+
+        return {
+            'id': video_id,
+            'ext': 'asf',
+            'url': video_url,
+            'title': title,
+            'description': description,
+        }
index e7e0042fb4e39a77061976078d4662a9cc17f522..64cded70789249746a5e2b6604d86563a6ad499c 100644 (file)
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import (
+    US_RATINGS,
+)
 
 
 class PBSIE(InfoExtractor):
@@ -13,7 +16,7 @@ class PBSIE(InfoExtractor):
             # Article with embedded player
            (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
            # Player
-           video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
+           video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
         )
     '''
 
@@ -57,6 +60,11 @@ class PBSIE(InfoExtractor):
         info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
         info = self._download_json(info_url, display_id)
 
+        rating_str = info.get('rating')
+        if rating_str is not None:
+            rating_str = rating_str.rpartition('-')[2]
+        age_limit = US_RATINGS.get(rating_str)
+
         return {
             'id': video_id,
             'title': info['title'],
@@ -65,4 +73,5 @@ class PBSIE(InfoExtractor):
             'description': info['program'].get('description'),
             'thumbnail': info.get('image_url'),
             'duration': info.get('duration'),
+            'age_limit': age_limit,
         }
index 834fe7266c4e0bd4bc56bcd9807ec80c2cee7f05..7dd3dca0de94f41bb82c9baeacc45e5ab14ae0a2 100644 (file)
@@ -8,6 +8,7 @@ from ..utils import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_parse,
+    str_to_int,
 )
 from ..aes import (
     aes_decrypt_text
@@ -27,6 +28,12 @@ class PornHubIE(InfoExtractor):
         }
     }
 
+    def _extract_count(self, pattern, webpage, name):
+        count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
+        if count:
+            count = str_to_int(count)
+        return count
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('videoid')
@@ -37,11 +44,19 @@ class PornHubIE(InfoExtractor):
         webpage = self._download_webpage(req, video_id)
 
         video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
-        video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, 'uploader', fatal=False)
+        video_uploader = self._html_search_regex(
+            r'(?s)<div class="video-info-row">\s*From:&nbsp;.+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
+            webpage, 'uploader', fatal=False)
         thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
         if thumbnail:
             thumbnail = compat_urllib_parse.unquote(thumbnail)
 
+        view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+        like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+        dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+        comment_count = self._extract_count(
+            r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
+
         video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
         if webpage.find('"encrypted":true') != -1:
             password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
@@ -77,6 +92,10 @@ class PornHubIE(InfoExtractor):
             'uploader': video_uploader,
             'title': video_title,
             'thumbnail': thumbnail,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'comment_count': comment_count,
             'formats': formats,
             'age_limit': 18,
         }
similarity index 50%
rename from youtube_dl/extractor/vgtrk.py
rename to youtube_dl/extractor/rutv.py
index 429b8bc728c2b615bbadd36bc6bebd45868e6547..5c38cbc02b7748b49daf3654291dfe5fe2b49a50 100644 (file)
@@ -10,33 +10,19 @@ from ..utils import (
 )
 
 
-class VGTRKIE(InfoExtractor):
-    IE_DESC = 'ВГТРК'
-    _VALID_URL = r'http://(?:.+?\.)?(?:vesti\.ru|russia2?\.tv|tvkultura\.ru|rutv\.ru)/(?P<id>.+)'
+class RUTVIE(InfoExtractor):
+    IE_DESC = 'RUTV.RU'
+    _VALID_URL = r'https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:flash2v/container\.swf\?id=|iframe/(?P<type>swf|video|live)/id/)(?P<id>\d+)'
 
     _TESTS = [
         {
-            'url': 'http://www.vesti.ru/videos?vid=575582&cid=1',
+            'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724',
             'info_dict': {
-                'id': '765035',
-                'ext': 'mp4',
-                'title': 'Вести.net: биткоины в России не являются законными',
-                'description': 'md5:d4bb3859dc1177b28a94c5014c35a36b',
-                'duration': 302,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://www.vesti.ru/doc.html?id=1349233',
-            'info_dict': {
-                'id': '773865',
+                'id': '774471',
                 'ext': 'mp4',
-                'title': 'УÑ\87аÑ\81Ñ\82ники Ð¼Ð¸Ñ\82инга Ñ\88Ñ\82Ñ\83Ñ\80мÑ\83Ñ\8eÑ\82 Ð\94онеÑ\86кÑ\83Ñ\8e Ð¾Ð±Ð»Ð°Ñ\81Ñ\82нÑ\83Ñ\8e Ð°Ð´Ð¼Ð¸Ð½Ð¸Ñ\81Ñ\82Ñ\80аÑ\86иÑ\8e',
-                'description': 'md5:1a160e98b3195379b4c849f2f4958009',
-                'duration': 210,
+                'title': 'Ð\9cонологи Ð½Ð° Ð²Ñ\81е Ð²Ñ\80емена',
+                'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
+                'duration': 2906,
             },
             'params': {
                 # m3u8 download
@@ -44,13 +30,13 @@ class VGTRKIE(InfoExtractor):
             },
         },
         {
-            'url': 'http://www.vesti.ru/only_video.html?vid=576180',
+            'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638',
             'info_dict': {
-                'id': '766048',
+                'id': '774016',
                 'ext': 'mp4',
-                'title': 'СШÐ\90 Ð·Ð°Ð¼Ð¾Ñ\80озило, Ð\91Ñ\80иÑ\82аниÑ\8e Ð·Ð°Ñ\82опило',
-                'description': 'md5:f0ed0695ec05aed27c56a70a58dc4cc1',
-                'duration': 87,
+                'title': 'ЧÑ\83жой Ð² Ñ\81емÑ\8cе Ð¡Ñ\82алина',
+                'description': '',
+                'duration': 2539,
             },
             'params': {
                 # m3u8 download
@@ -58,7 +44,7 @@ class VGTRKIE(InfoExtractor):
             },
         },
         {
-            'url': 'http://hitech.vesti.ru/news/view/id/4000',
+            'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000',
             'info_dict': {
                 'id': '766888',
                 'ext': 'mp4',
@@ -72,22 +58,21 @@ class VGTRKIE(InfoExtractor):
             },
         },
         {
-            'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
+            'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169',
             'info_dict': {
-                'id': '766403',
+                'id': '771852',
                 'ext': 'mp4',
-                'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
-                'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
-                'duration': 271,
+                'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
+                'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
+                'duration': 3096,
             },
             'params': {
                 # m3u8 download
                 'skip_download': True,
             },
-            'skip': 'Blocked outside Russia',
         },
         {
-            'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
+            'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014',
             'info_dict': {
                 'id': '51499',
                 'ext': 'flv',
@@ -98,124 +83,44 @@ class VGTRKIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'Translation has finished'
-        },
-        {
-            'url': 'http://russia.tv/video/show/brand_id/5169/episode_id/970443/video_id/975648',
-            'info_dict': {
-                'id': '771852',
-                'ext': 'mp4',
-                'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
-                'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
-                'duration': 3096,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://russia.tv/brand/show/brand_id/57638',
-            'info_dict': {
-                'id': '774016',
-                'ext': 'mp4',
-                'title': 'Чужой в семье Сталина',
-                'description': '',
-                'duration': 2539,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://2.russia.tv/video/show/brand_id/48863/episode_id/972920/video_id/978667/viewtype/picture',
-            'info_dict': {
-                'id': '775081',
-                'ext': 'mp4',
-                'title': 'XXII зимние Олимпийские игры. Россияне заняли весь пьедестал в лыжных гонках',
-                'description': 'md5:15d3741dd8d04b203fbc031c6a47fb0f',
-                'duration': 101,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-            'skip': 'Blocked outside Russia',
-        },
-        {
-            'url': 'http://tvkultura.ru/video/show/brand_id/31724/episode_id/972347/video_id/978186',
-            'info_dict': {
-                'id': '774471',
-                'ext': 'mp4',
-                'title': 'Монологи на все времена',
-                'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
-                'duration': 2906,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://rutv.ru/brand/show/id/6792/channel/75',
-            'info_dict': {
-                'id': '125521',
-                'ext': 'mp4',
-                'title': 'Грустная дама червей. Х/ф',
-                'description': '',
-                'duration': 4882,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
+            'skip': 'Translation has finished',
         },
     ]
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        page = self._download_webpage(url, video_id, 'Downloading page')
-
+    @classmethod
+    def _extract_url(cls, webpage):
         mobj = re.search(
-            r'<meta property="og:video" content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)',
-            page)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/iframe/(?:swf|video|live)/id/.+?)\1', webpage)
         if mobj:
-            video_id = mobj.group('id')
-            page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
-                'Downloading video page')
+            return mobj.group('url')
 
         mobj = re.search(
-            r'<meta property="og:video" content="http://player\.rutv\.ru/flash2v/container\.swf\?id=(?P<id>\d+)', page)
+            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
+            webpage)
         if mobj:
-            video_type = 'video'
-            video_id = mobj.group('id')
-        else:
-            mobj = re.search(
-                r'<iframe.+?src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*".*?></iframe>',
-                page)
+            return mobj.group('url')
 
-            if not mobj:
-                raise ExtractorError('No media found', expected=True)
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        video_type = mobj.group('type')
 
-            video_type = mobj.group('type')
-            video_id = mobj.group('id')
+        if not video_type or video_type == 'swf':
+            video_type = 'video'
 
         json_data = self._download_json(
             'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
             video_id, 'Downloading JSON')
 
         if json_data['errors']:
-            raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True)
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True)
 
         playlist = json_data['data']['playlist']
         medialist = playlist['medialist']
         media = medialist[0]
 
         if media['errors']:
-            raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True)
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True)
 
         view_count = playlist.get('count_views')
         priority_transport = playlist['priority_transport']
index cf10be2d02cb5bc538e2561eee2c3fb5e47643c6..ad1a46c3385713056b94d2f00e38558e8ea69b1c 100644 (file)
@@ -11,7 +11,9 @@ from ..utils import (
 
 
 class TEDIE(SubtitlesInfoExtractor):
-    _VALID_URL = r'''(?x)http://www\.ted\.com/
+    _VALID_URL = r'''(?x)
+        (?P<proto>https?://)
+        (?P<type>www|embed)(?P<urlmain>\.ted\.com/
         (
             (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
             |
@@ -19,6 +21,7 @@ class TEDIE(SubtitlesInfoExtractor):
         )
         (/lang/(.*?))? # The url may contain the language
         /(?P<name>\w+) # Here goes the name and then ".html"
+        .*)$
         '''
     _TEST = {
         'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
@@ -48,6 +51,9 @@ class TEDIE(SubtitlesInfoExtractor):
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url, re.VERBOSE)
+        if m.group('type') == 'embed':
+            desktop_url = m.group('proto') + 'www' + m.group('urlmain')
+            return self.url_result(desktop_url, 'TED')
         name = m.group('name')
         if m.group('type_talk'):
             return self._talk_info(url, name)
@@ -93,11 +99,14 @@ class TEDIE(SubtitlesInfoExtractor):
             self._list_available_subtitles(video_id, talk_info)
             return
 
+        thumbnail = talk_info['thumb']
+        if not thumbnail.startswith('http'):
+            thumbnail = 'http://' + thumbnail
         return {
             'id': video_id,
             'title': talk_info['title'],
             'uploader': talk_info['speaker'],
-            'thumbnail': talk_info['thumb'],
+            'thumbnail': thumbnail,
             'description': self._og_search_description(webpage),
             'subtitles': video_subtitles,
             'formats': formats,
diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py
new file mode 100644 (file)
index 0000000..34008af
--- /dev/null
@@ -0,0 +1,75 @@
+from .common import InfoExtractor
+import re
+
+
+class ToypicsIE(InfoExtractor):
+    IE_DESC = 'Toypics user profile'
+    _VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
+    _TEST = {
+        'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
+        'md5': '16e806ad6d6f58079d210fe30985e08b',
+        'info_dict': {
+            'id': '514',
+            'ext': 'mp4',
+            'title': 'Chance-Bulge\'d, 2',
+            'age_limit': 18,
+            'uploader': 'kidsune',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        page = self._download_webpage(url, video_id)
+        video_url = self._html_search_regex(
+            r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')
+        title = self._html_search_regex(
+            r'<title>Toypics - ([^<]+)</title>', page, 'title')
+        username = self._html_search_regex(
+            r'toypics.net/([^/"]+)" class="user-name">', page, 'username')
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'uploader': username,
+            'age_limit': 18,
+        }
+
+
+class ToypicsUserIE(InfoExtractor):
+    IE_DESC = 'Toypics user profile'
+    _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        username = mobj.group('username')
+
+        profile_page = self._download_webpage(
+            url, username, note='Retrieving profile page')
+
+        video_count = int(self._search_regex(
+            r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page,
+            'video count'))
+
+        PAGE_SIZE = 8
+        urls = []
+        page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
+        for n in range(1, page_count + 1):
+            lpage_url = url + '/public/%d' % n
+            lpage = self._download_webpage(
+                lpage_url, username,
+                note='Downloading page %d/%d' % (n, page_count))
+            urls.extend(
+                re.findall(
+                    r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">',
+                    lpage))
+
+        return {
+            '_type': 'playlist',
+            'id': username,
+            'entries': [{
+                '_type': 'url',
+                'url': eurl,
+                'ie_key': 'Toypics',
+            } for eurl in urls]
+        }
index 35df918b879ced4b7f2d7e90f1e21d15479504ca..054f427252341306edd7698e6d150bcb619b64fc 100644 (file)
@@ -16,7 +16,7 @@ class UdemyIE(InfoExtractor):
     _LOGIN_URL = 'https://www.udemy.com/join/login-submit/'
     _NETRC_MACHINE = 'udemy'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757',
         'md5': '98eda5b657e752cf945d8445e261b5c5',
         'info_dict': {
@@ -27,7 +27,7 @@ class UdemyIE(InfoExtractor):
             'duration': 579.29,
         },
         'skip': 'Requires udemy account credentials',
-    }
+    }]
 
     def _handle_error(self, response):
         if not isinstance(response, dict):
@@ -129,6 +129,7 @@ class UdemyCourseIE(UdemyIE):
     _VALID_URL = r'https?://www\.udemy\.com/(?P<coursepath>[\da-z-]+)'
     _SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<'
     _ALREADY_ENROLLED = '>You are already taking this course.<'
+    _TESTS = []
 
     @classmethod
     def suitable(cls, url):
diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vesti.py
new file mode 100644 (file)
index 0000000..27f9acb
--- /dev/null
@@ -0,0 +1,121 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+from .rutv import RUTVIE
+
+
+class VestiIE(InfoExtractor):
+    IE_DESC = 'Вести.Ru'
+    _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
+
+    _TESTS = [
+        {
+            'url': 'http://www.vesti.ru/videos?vid=575582&cid=1',
+            'info_dict': {
+                'id': '765035',
+                'ext': 'mp4',
+                'title': 'Вести.net: биткоины в России не являются законными',
+                'description': 'md5:d4bb3859dc1177b28a94c5014c35a36b',
+                'duration': 302,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.vesti.ru/doc.html?id=1349233',
+            'info_dict': {
+                'id': '773865',
+                'ext': 'mp4',
+                'title': 'Участники митинга штурмуют Донецкую областную администрацию',
+                'description': 'md5:1a160e98b3195379b4c849f2f4958009',
+                'duration': 210,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.vesti.ru/only_video.html?vid=576180',
+            'info_dict': {
+                'id': '766048',
+                'ext': 'mp4',
+                'title': 'США заморозило, Британию затопило',
+                'description': 'md5:f0ed0695ec05aed27c56a70a58dc4cc1',
+                'duration': 87,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://hitech.vesti.ru/news/view/id/4000',
+            'info_dict': {
+                'id': '766888',
+                'ext': 'mp4',
+                'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
+                'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
+                'duration': 279,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
+            'info_dict': {
+                'id': '766403',
+                'ext': 'mp4',
+                'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
+                'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
+                'duration': 271,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+            'skip': 'Blocked outside Russia',
+        },
+        {
+            'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
+            'info_dict': {
+                'id': '51499',
+                'ext': 'flv',
+                'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
+                'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+            'skip': 'Translation has finished'
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage(url, video_id, 'Downloading page')
+
+        mobj = re.search(
+            r'<meta[^>]+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)',
+            page)
+        if mobj:
+            video_id = mobj.group('id')
+            page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
+                'Downloading video page')
+
+        rutv_url = RUTVIE._extract_url(page)
+        if rutv_url:
+            return self.url_result(rutv_url, 'RUTV')
+
+        raise ExtractorError('No video found', expected=True)
\ No newline at end of file
index c3360f16676183b868eeb495b5552e9a459477c3..ee47c30bab9bd37a5d8e75109f29108864db68d0 100644 (file)
@@ -21,6 +21,7 @@ class VevoIE(InfoExtractor):
            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
            vevo:)
         (?P<id>[^&?#]+)'''
+
     _TESTS = [{
         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
         "md5": "06bea460acb744eab74a9d7dcb4bfd61",
@@ -33,7 +34,8 @@ class VevoIE(InfoExtractor):
             "duration": 230.12,
             "width": 1920,
             "height": 1080,
-            'timestamp': 1372057200,
+            # timestamp and upload_date are often incorrect; seem to change randomly
+            'timestamp': int,
         }
     }, {
         'note': 'v3 SMIL format',
@@ -47,7 +49,7 @@ class VevoIE(InfoExtractor):
             'title': 'I Wish I Could Break Your Heart',
             'duration': 226.101,
             'age_limit': 0,
-            'timestamp': 1392796919,
+            'timestamp': int,
         }
     }, {
         'note': 'Age-limited video',
@@ -58,7 +60,6 @@ class VevoIE(InfoExtractor):
             'age_limit': 18,
             'title': 'Tunnel Vision (Explicit)',
             'uploader': 'Justin Timberlake',
-            # timestamp and upload_date are often incorrect; seem to change randomly
             'upload_date': 're:2013070[34]',
             'timestamp': int,
         },
diff --git a/youtube_dl/extractor/videolecturesnet.py b/youtube_dl/extractor/videolecturesnet.py
new file mode 100644 (file)
index 0000000..ebd2a3d
--- /dev/null
@@ -0,0 +1,70 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    find_xpath_attr,
+    int_or_none,
+    parse_duration,
+    unified_strdate,
+)
+
+
+class VideoLecturesNetIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
+    IE_NAME = 'videolectures.net'
+
+    _TEST = {
+        'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
+        'info_dict': {
+            'id': 'promogram_igor_mekjavic_eng',
+            'ext': 'mp4',
+            'title': 'Automatics, robotics and biocybernetics',
+            'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+            'upload_date': '20130627',
+            'duration': 565,
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
+        smil = self._download_xml(smil_url, video_id)
+
+        title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
+        description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
+        description = (
+            None if description_el is None
+            else description_el.attrib['content'])
+        upload_date = unified_strdate(
+            find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
+
+        switch = smil.find('.//switch')
+        duration = parse_duration(switch.attrib.get('dur'))
+        thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
+        thumbnail = (
+            None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
+
+        formats = [{
+            'url': v.attrib['src'],
+            'width': int_or_none(v.attrib.get('width')),
+            'height': int_or_none(v.attrib.get('height')),
+            'filesize': int_or_none(v.attrib.get('size')),
+            'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
+            'ext': v.attrib.get('ext'),
+        } for v in switch.findall('./video')
+            if v.attrib.get('proto') == 'http']
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'upload_date': upload_date,
+            'duration': duration,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index 2206a06d59f57093f59135f6faa8d68381695a95..15f31529822bcba124cfb12bcb9e56566b3bfba7 100644 (file)
@@ -1,29 +1,33 @@
+from __future__ import unicode_literals
+
 import re
 
 from ..utils import (
     ExtractorError,
     unescapeHTML,
     unified_strdate,
+    US_RATINGS,
 )
 from .subtitles import SubtitlesInfoExtractor
 
 
 class VikiIE(SubtitlesInfoExtractor):
-    IE_NAME = u'viki'
+    IE_NAME = 'viki'
 
     _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
     _TEST = {
-        u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
-        u'file': u'1023585v.mp4',
-        u'md5': u'a21454021c2646f5433514177e2caa5f',
-        u'info_dict': {
-            u'title': u'Heirs Episode 14',
-            u'uploader': u'SBS',
-            u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
-            u'upload_date': u'20131121',
-            u'age_limit': 13,
+        'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
+        'md5': 'a21454021c2646f5433514177e2caa5f',
+        'info_dict': {
+            'id': '1023585v',
+            'ext': 'mp4',
+            'title': 'Heirs Episode 14',
+            'uploader': 'SBS',
+            'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
+            'upload_date': '20131121',
+            'age_limit': 13,
         },
-        u'skip': u'Blocked in the US',
+        'skip': 'Blocked in the US',
     }
 
     def _real_extract(self, url):
@@ -44,28 +48,21 @@ class VikiIE(SubtitlesInfoExtractor):
 
         rating_str = self._html_search_regex(
             r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
-            u'rating information', default='').strip()
-        RATINGS = {
-            'G': 0,
-            'PG': 10,
-            'PG-13': 13,
-            'R': 16,
-            'NC': 18,
-        }
-        age_limit = RATINGS.get(rating_str)
+            'rating information', default='').strip()
+        age_limit = US_RATINGS.get(rating_str)
 
         info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
         info_webpage = self._download_webpage(
-            info_url, video_id, note=u'Downloading info page')
+            info_url, video_id, note='Downloading info page')
         if re.match(r'\s*<div\s+class="video-error', info_webpage):
             raise ExtractorError(
-                u'Video %s is blocked from your location.' % video_id,
+                'Video %s is blocked from your location.' % video_id,
                 expected=True)
         video_url = self._html_search_regex(
-            r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
+            r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
 
         upload_date_str = self._html_search_regex(
-            r'"created_at":"([^"]+)"', info_webpage, u'upload date')
+            r'"created_at":"([^"]+)"', info_webpage, 'upload date')
         upload_date = (
             unified_strdate(upload_date_str)
             if upload_date_str is not None
index fc9237a3f2eb2b456302659b1954c2715e6feb35..4e89acd81bbb5ddfb97b1da3381b1b9f25873c96 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -7,14 +9,14 @@ class WorldStarHipHopIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
     _TEST = {
         "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
-        "file": "wshh6a7q1ny0G34ZwuIO.mp4",
         "md5": "9d04de741161603bf7071bbf4e883186",
         "info_dict": {
+            "id": "wshh6a7q1ny0G34ZwuIO",
+            "ext": "mp4",
             "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
         }
     }
 
-
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
         video_id = m.group('id')
@@ -23,41 +25,32 @@ class WorldStarHipHopIE(InfoExtractor):
 
         m_vevo_id = re.search(r'videoId=(.*?)&amp?',
                               webpage_src)
-
         if m_vevo_id is not None:
-            self.to_screen(u'Vevo video detected:')
             return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
 
-        video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
-            webpage_src, u'video URL')
+        video_url = self._search_regex(
+            r'so\.addVariable\("file","(.*?)"\)', webpage_src, 'video URL')
 
         if 'youtube' in video_url:
-            self.to_screen(u'Youtube video detected:')
             return self.url_result(video_url, ie='Youtube')
 
-        if 'mp4' in video_url:
-            ext = 'mp4'
-        else:
-            ext = 'flv'
-
-        video_title = self._html_search_regex(r"<title>(.*)</title>",
-            webpage_src, u'title')
+        video_title = self._html_search_regex(
+            r"<title>(.*)</title>", webpage_src, 'title')
 
         # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
-        thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
-            webpage_src, u'thumbnail', fatal=False)
-
+        thumbnail = self._html_search_regex(
+            r'rel="image_src" href="(.*)" />', webpage_src, 'thumbnail',
+            fatal=False)
         if not thumbnail:
             _title = r"""candytitles.*>(.*)</span>"""
             mobj = re.search(_title, webpage_src)
             if mobj is not None:
                 video_title = mobj.group(1)
 
-        results = [{
-                    'id': video_id,
-                    'url' : video_url,
-                    'title' : video_title,
-                    'thumbnail' : thumbnail,
-                    'ext' : ext,
-                    }]
-        return results
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title,
+            'thumbnail': thumbnail,
+        }
+
diff --git a/youtube_dl/extractor/xbef.py b/youtube_dl/extractor/xbef.py
new file mode 100644 (file)
index 0000000..71bd7c4
--- /dev/null
@@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+)
+
+
+class XBefIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking',
+        'md5': 'a478b565baff61634a98f5e5338be995',
+        'info_dict': {
+            'id': '5119',
+            'ext': 'mp4',
+            'title': 'md5:7358a9faef8b7b57acda7c04816f170e',
+            'age_limit': 18,
+            'thumbnail': 're:^http://.*\.jpg',
+        }
+    }
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_regex(
+            r'<h1[^>]*>(.*?)</h1>', webpage, 'title')
+
+        config_url_enc = self._download_webpage(
+            'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id,
+            note='Retrieving config URL')
+        config_url = compat_urllib_parse.unquote(config_url_enc)
+        config = self._download_xml(
+            config_url, video_id, note='Retrieving config')
+
+        video_url = config.find('./file').text
+        thumbnail = config.find('./image').text
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'age_limit': 18,
+        }
+
index d3eefd086720a82aa5f78c52d941fc6aee35c655..b293e2665b81b9a486bff2ec91b410da4a6d9998 100644 (file)
@@ -1,11 +1,10 @@
 from __future__ import unicode_literals
 
-import os
 import re
+import json
 
 from .common import InfoExtractor
 from ..utils import (
-    compat_urllib_parse_urlparse,
     compat_urllib_request,
     parse_duration,
     str_to_int,
@@ -42,7 +41,6 @@ class XTubeIE(InfoExtractor):
             r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
         video_description = self._html_search_regex(
             r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
-        video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
         duration = parse_duration(self._html_search_regex(
             r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
         view_count = self._html_search_regex(
@@ -54,12 +52,18 @@ class XTubeIE(InfoExtractor):
         if comment_count:
             comment_count = str_to_int(comment_count)
 
-        path = compat_urllib_parse_urlparse(video_url).path
-        extension = os.path.splitext(path)[1][1:]
-        format = path.split('/')[5].split('_')[:2]
-        format[0] += 'p'
-        format[1] += 'k'
-        format = "-".join(format)
+        player_quality_option = json.loads(self._html_search_regex(
+            r'playerQualityOption = ({.+?});', webpage, 'player quality option'))
+
+        QUALITIES = ['3gp', 'mp4_normal', 'mp4_high', 'flv', 'mp4_ultra', 'mp4_720', 'mp4_1080']
+        formats = [
+            {
+                'url': furl,
+                'format_id': format_id,
+                'preference': QUALITIES.index(format_id) if format_id in QUALITIES else -1,
+            } for format_id, furl in player_quality_option.items()
+        ]
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
@@ -69,9 +73,42 @@ class XTubeIE(InfoExtractor):
             'duration': duration,
             'view_count': view_count,
             'comment_count': comment_count,
-            'url': video_url,
-            'ext': extension,
-            'format': format,
-            'format_id': format,
+            'formats': formats,
             'age_limit': 18,
         }
+
+class XTubeUserIE(InfoExtractor):
+    IE_DESC = 'XTube user profile'
+    _VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        username = mobj.group('username')
+
+        profile_page = self._download_webpage(
+            url, username, note='Retrieving profile page')
+
+        video_count = int(self._search_regex(
+            r'<strong>%s\'s Videos \(([0-9]+)\)</strong>'%username, profile_page,
+            'video count'))
+
+        PAGE_SIZE = 25
+        urls = []
+        page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
+        for n in range(1, page_count + 1):
+            lpage_url = 'http://www.xtube.com/user_videos.php?page=%d&u=%s' % (n, username)
+            lpage = self._download_webpage(
+                lpage_url, username,
+                note='Downloading page %d/%d' % (n, page_count))
+            urls.extend(
+                re.findall(r'addthis:url="([^"]+)"', lpage))
+
+        return {
+            '_type': 'playlist',
+            'id': username,
+            'entries': [{
+                '_type': 'url',
+                'url': eurl,
+                'ie_key': 'XTube',
+            } for eurl in urls]
+        }
index 77ad423c44b38af655fc14a8918dfbcf677ca936..d456c4da522d689ac7bcbd33c5f8a3b1204c3b00 100644 (file)
@@ -1,3 +1,6 @@
+from __future__ import unicode_literals
+
+
 import json
 import re
 import sys
@@ -17,24 +20,25 @@ from ..aes import (
 
 
 class YouPornIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
+    _VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
     _TEST = {
-        u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
-        u'file': u'505835.mp4',
-        u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
-        u'info_dict': {
-            u"upload_date": u"20101221",
-            u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
-            u"uploader": u"Ask Dan And Jennifer",
-            u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
-            u"age_limit": 18,
+        'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+        'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
+        'info_dict': {
+            'id': '505835',
+            'ext': 'mp4',
+            'upload_date': '20101221',
+            'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
+            'uploader': 'Ask Dan And Jennifer',
+            'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
+            'age_limit': 18,
         }
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('videoid')
-        url = 'http://www.' + mobj.group('url')
+        url = mobj.group('proto') + 'www.' + mobj.group('url')
 
         req = compat_urllib_request.Request(url)
         req.add_header('Cookie', 'age_verified=1')
@@ -42,7 +46,7 @@ class YouPornIE(InfoExtractor):
         age_limit = self._rta_search(webpage)
 
         # Get JSON parameters
-        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
+        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
         try:
             params = json.loads(json_params)
         except:
@@ -61,7 +65,7 @@ class YouPornIE(InfoExtractor):
         # Get all of the links from the page
         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
         download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
-            webpage, u'download list').strip()
+            webpage, 'download list').strip()
         LINK_RE = r'<a href="([^"]+)">'
         links = re.findall(LINK_RE, download_list_html)
 
@@ -86,7 +90,7 @@ class YouPornIE(InfoExtractor):
             resolution = format_parts[0]
             height = int(resolution[:-len('p')])
             bitrate = int(format_parts[1][:-len('k')])
-            format = u'-'.join(format_parts) + u'-' + dn
+            format = '-'.join(format_parts) + '-' + dn
 
             formats.append({
                 'url': video_url,
index f7cb497a87071698bb5361c9695c121a9693f1d0..3a3a5a39e4bfcdce1a3bee26fee3e43326d62d90 100644 (file)
@@ -176,32 +176,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
 
         # 3d videos
-        '82': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
-        '83': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
-        '84': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
-        '85': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': '3D', 'preference': -20},
-        '100': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
-        '101': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
-        '102': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
+        '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
+        '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
+        '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
+        '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
+        '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
+        '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
+        '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
 
         # Apple HTTP Live Streaming
-        '92': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
-        '93': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'HLS', 'preference': -10},
-        '94': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'HLS', 'preference': -10},
-        '95': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'HLS', 'preference': -10},
-        '96': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'HLS', 'preference': -10},
-        '132': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
-        '151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10},
+        '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
+        '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
+        '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
+        '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
+        '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
+        '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
+        '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
 
         # DASH mp4 video
-        '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 
         # Dash mp4 audio
         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
@@ -215,13 +215,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40},
-        '243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40},
-        '244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
-        '245': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
-        '246': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
-        '247': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': 'DASH webm', 'preference': -40},
-        '248': {'ext': 'webm', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH webm', 'preference': -40},
+        '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40},
+        '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40},
+        '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
+        '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
+        '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
+        '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40},
+        '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40},
 
         # Dash webm audio
         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
@@ -1130,14 +1130,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
 
     def _real_extract(self, url):
+        proto = (
+            u'http' if self._downloader.params.get('prefer_insecure', False)
+            else u'https')
+
         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
         mobj = re.search(self._NEXT_URL_RE, url)
         if mobj:
-            url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
+            url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
         video_id = self.extract_id(url)
 
         # Get video webpage
-        url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
+        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
         video_webpage = self._download_webpage(url, video_id)
 
         # Attempt to extract SWF player URL
@@ -1162,7 +1166,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                                                   'asv': 3,
                                                   'sts':'1588',
                                                   })
-            video_info_url = 'https://www.youtube.com/get_video_info?' + data
+            video_info_url = proto + '://www.youtube.com/get_video_info?' + data
             video_info_webpage = self._download_webpage(video_info_url, video_id,
                                     note=False,
                                     errnote='unable to download video info webpage')
@@ -1170,7 +1174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         else:
             age_gate = False
             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
-                video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+                video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
                         % (video_id, el_type))
                 video_info_webpage = self._download_webpage(video_info_url, video_id,
                                         note=False,
@@ -1445,7 +1449,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             'duration':     video_duration,
             'age_limit':    18 if age_gate else 0,
             'annotations':  video_annotations,
-            'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
+            'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
             'view_count':   view_count,
             'like_count': like_count,
             'dislike_count': dislike_count,
index 3cf29e63a36a9210fefbd2c64e46839fb9434017..8b359cb77f9188baa9c0a1fcea322268992bd90d 100644 (file)
@@ -6,6 +6,7 @@ import ctypes
 import datetime
 import email.utils
 import errno
+import getpass
 import gzip
 import itertools
 import io
@@ -762,6 +763,10 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
 def unified_strdate(date_str):
     """Return a string with the date in the format YYYYMMDD"""
+
+    if date_str is None:
+        return None
+
     upload_date = None
     #Replace commas
     date_str = date_str.replace(',', ' ')
@@ -1121,11 +1126,11 @@ def setproctitle(title):
         libc = ctypes.cdll.LoadLibrary("libc.so.6")
     except OSError:
         return
-    title = title
-    buf = ctypes.create_string_buffer(len(title) + 1)
-    buf.value = title.encode('utf-8')
+    title_bytes = title.encode('utf-8')
+    buf = ctypes.create_string_buffer(len(title_bytes))
+    buf.value = title_bytes
     try:
-        libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
+        libc.prctl(15, buf, 0, 0, 0)
     except AttributeError:
         return  # Strange libc, just skip this
 
@@ -1279,3 +1284,21 @@ def parse_xml(s):
     parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
     kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
     return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
+
+
+if sys.version_info < (3, 0) and sys.platform == 'win32':
+    def compat_getpass(prompt, *args, **kwargs):
+        if isinstance(prompt, compat_str):
+            prompt = prompt.encode(preferredencoding())
+        return getpass.getpass(prompt, *args, **kwargs)
+else:
+    compat_getpass = getpass.getpass
+
+
+US_RATINGS = {
+    'G': 0,
+    'PG': 10,
+    'PG-13': 13,
+    'R': 16,
+    'NC': 18,
+}
index c038225f70e9785948e28691de69d7e9595cbfb0..27911ab537560d7ed407e64e4e78178e1cc67532 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.03.12'
+__version__ = '2014.03.23'