5e0a9e10cb56d3a906102e3dced5da98a09c59ec
[ytdl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5
6 import itertools
7 import json
8 import os.path
9 import random
10 import re
11 import time
12 import traceback
13
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
18     compat_chr,
19     compat_kwargs,
20     compat_parse_qs,
21     compat_urllib_parse_unquote,
22     compat_urllib_parse_unquote_plus,
23     compat_urllib_parse_urlencode,
24     compat_urllib_parse_urlparse,
25     compat_urlparse,
26     compat_str,
27 )
28 from ..utils import (
29     clean_html,
30     error_to_compat_str,
31     ExtractorError,
32     float_or_none,
33     get_element_by_attribute,
34     get_element_by_id,
35     int_or_none,
36     mimetype2ext,
37     orderedSet,
38     parse_codecs,
39     parse_duration,
40     qualities,
41     remove_quotes,
42     remove_start,
43     smuggle_url,
44     str_or_none,
45     str_to_int,
46     try_get,
47     unescapeHTML,
48     unified_strdate,
49     unsmuggle_url,
50     uppercase_escape,
51     url_or_none,
52     urlencode_postdata,
53 )
54
55
56 class YoutubeBaseInfoExtractor(InfoExtractor):
57     """Provide base functions for Youtube extractors"""
58     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
59     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
60
61     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
62     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
63     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
64
65     _NETRC_MACHINE = 'youtube'
66     # If True it will raise an error if no login info is provided
67     _LOGIN_REQUIRED = False
68
69     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
70
71     def _set_language(self):
72         self._set_cookie(
73             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
74             # YouTube sets the expire time to about two months
75             expire_time=time.time() + 2 * 30 * 24 * 3600)
76
77     def _ids_to_results(self, ids):
78         return [
79             self.url_result(vid_id, 'Youtube', video_id=vid_id)
80             for vid_id in ids]
81
82     def _login(self):
83         """
84         Attempt to log in to YouTube.
85         True is returned if successful or skipped.
86         False is returned if login failed.
87
88         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
89         """
90         username, password = self._get_login_info()
91         # No authentication to be performed
92         if username is None:
93             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
94                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
95             return True
96
97         login_page = self._download_webpage(
98             self._LOGIN_URL, None,
99             note='Downloading login page',
100             errnote='unable to fetch login page', fatal=False)
101         if login_page is False:
102             return
103
104         login_form = self._hidden_inputs(login_page)
105
106         def req(url, f_req, note, errnote):
107             data = login_form.copy()
108             data.update({
109                 'pstMsg': 1,
110                 'checkConnection': 'youtube',
111                 'checkedDomains': 'youtube',
112                 'hl': 'en',
113                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
114                 'f.req': json.dumps(f_req),
115                 'flowName': 'GlifWebSignIn',
116                 'flowEntry': 'ServiceLogin',
117             })
118             return self._download_json(
119                 url, None, note=note, errnote=errnote,
120                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
121                 fatal=False,
122                 data=urlencode_postdata(data), headers={
123                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
124                     'Google-Accounts-XSRF': 1,
125                 })
126
127         def warn(message):
128             self._downloader.report_warning(message)
129
130         lookup_req = [
131             username,
132             None, [], None, 'US', None, None, 2, False, True,
133             [
134                 None, None,
135                 [2, 1, None, 1,
136                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
137                  None, [], 4],
138                 1, [None, None, []], None, None, None, True
139             ],
140             username,
141         ]
142
143         lookup_results = req(
144             self._LOOKUP_URL, lookup_req,
145             'Looking up account info', 'Unable to look up account info')
146
147         if lookup_results is False:
148             return False
149
150         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
151         if not user_hash:
152             warn('Unable to extract user hash')
153             return False
154
155         challenge_req = [
156             user_hash,
157             None, 1, None, [1, None, None, None, [password, None, True]],
158             [
159                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
160                 1, [None, None, []], None, None, None, True
161             ]]
162
163         challenge_results = req(
164             self._CHALLENGE_URL, challenge_req,
165             'Logging in', 'Unable to log in')
166
167         if challenge_results is False:
168             return
169
170         login_res = try_get(challenge_results, lambda x: x[0][5], list)
171         if login_res:
172             login_msg = try_get(login_res, lambda x: x[5], compat_str)
173             warn(
174                 'Unable to login: %s' % 'Invalid password'
175                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
176             return False
177
178         res = try_get(challenge_results, lambda x: x[0][-1], list)
179         if not res:
180             warn('Unable to extract result entry')
181             return False
182
183         login_challenge = try_get(res, lambda x: x[0][0], list)
184         if login_challenge:
185             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
186             if challenge_str == 'TWO_STEP_VERIFICATION':
187                 # SEND_SUCCESS - TFA code has been successfully sent to phone
188                 # QUOTA_EXCEEDED - reached the limit of TFA codes
189                 status = try_get(login_challenge, lambda x: x[5], compat_str)
190                 if status == 'QUOTA_EXCEEDED':
191                     warn('Exceeded the limit of TFA codes, try later')
192                     return False
193
194                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
195                 if not tl:
196                     warn('Unable to extract TL')
197                     return False
198
199                 tfa_code = self._get_tfa_info('2-step verification code')
200
201                 if not tfa_code:
202                     warn(
203                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
204                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
205                     return False
206
207                 tfa_code = remove_start(tfa_code, 'G-')
208
209                 tfa_req = [
210                     user_hash, None, 2, None,
211                     [
212                         9, None, None, None, None, None, None, None,
213                         [None, tfa_code, True, 2]
214                     ]]
215
216                 tfa_results = req(
217                     self._TFA_URL.format(tl), tfa_req,
218                     'Submitting TFA code', 'Unable to submit TFA code')
219
220                 if tfa_results is False:
221                     return False
222
223                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
224                 if tfa_res:
225                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
226                     warn(
227                         'Unable to finish TFA: %s' % 'Invalid TFA code'
228                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
229                     return False
230
231                 check_cookie_url = try_get(
232                     tfa_results, lambda x: x[0][-1][2], compat_str)
233             else:
234                 CHALLENGES = {
235                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
236                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
237                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
238                 }
239                 challenge = CHALLENGES.get(
240                     challenge_str,
241                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
242                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
243                 return False
244         else:
245             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
246
247         if not check_cookie_url:
248             warn('Unable to extract CheckCookie URL')
249             return False
250
251         check_cookie_results = self._download_webpage(
252             check_cookie_url, None, 'Checking cookie', fatal=False)
253
254         if check_cookie_results is False:
255             return False
256
257         if 'https://myaccount.google.com/' not in check_cookie_results:
258             warn('Unable to log in')
259             return False
260
261         return True
262
263     def _download_webpage_handle(self, *args, **kwargs):
264         query = kwargs.get('query', {}).copy()
265         query['disable_polymer'] = 'true'
266         kwargs['query'] = query
267         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
268             *args, **compat_kwargs(kwargs))
269
270     def _real_initialize(self):
271         if self._downloader is None:
272             return
273         self._set_language()
274         if not self._login():
275             return
276
277
278 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
279     # Extract entries from page with "Load more" button
280     def _entries(self, page, playlist_id):
281         more_widget_html = content_html = page
282         for page_num in itertools.count(1):
283             for entry in self._process_page(content_html):
284                 yield entry
285
286             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
287             if not mobj:
288                 break
289
290             more = self._download_json(
291                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
292                 'Downloading page #%s' % page_num,
293                 transform_source=uppercase_escape)
294             content_html = more['content_html']
295             if not content_html.strip():
296                 # Some webpages show a "Load more" button but they don't
297                 # have more videos
298                 break
299             more_widget_html = more['load_more_widget_html']
300
301
302 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
303     def _process_page(self, content):
304         for video_id, video_title in self.extract_videos_from_page(content):
305             yield self.url_result(video_id, 'Youtube', video_id, video_title)
306
307     def extract_videos_from_page(self, page):
308         ids_in_page = []
309         titles_in_page = []
310         for mobj in re.finditer(self._VIDEO_RE, page):
311             # The link with index 0 is not the first video of the playlist (not sure if still actual)
312             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
313                 continue
314             video_id = mobj.group('id')
315             video_title = unescapeHTML(mobj.group('title'))
316             if video_title:
317                 video_title = video_title.strip()
318             try:
319                 idx = ids_in_page.index(video_id)
320                 if video_title and not titles_in_page[idx]:
321                     titles_in_page[idx] = video_title
322             except ValueError:
323                 ids_in_page.append(video_id)
324                 titles_in_page.append(video_title)
325         return zip(ids_in_page, titles_in_page)
326
327
328 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
329     def _process_page(self, content):
330         for playlist_id in orderedSet(re.findall(
331                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
332                 content)):
333             yield self.url_result(
334                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
335
336     def _real_extract(self, url):
337         playlist_id = self._match_id(url)
338         webpage = self._download_webpage(url, playlist_id)
339         title = self._og_search_title(webpage, fatal=False)
340         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
341
342
343 class YoutubeIE(YoutubeBaseInfoExtractor):
344     IE_DESC = 'YouTube.com'
345     _VALID_URL = r"""(?x)^
346                      (
347                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
348                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
349                             (?:www\.)?deturl\.com/www\.youtube\.com/|
350                             (?:www\.)?pwnyoutube\.com/|
351                             (?:www\.)?hooktube\.com/|
352                             (?:www\.)?yourepeat\.com/|
353                             tube\.majestyc\.net/|
354                             (?:(?:www|dev)\.)?invidio\.us/|
355                             (?:www\.)?invidiou\.sh/|
356                             (?:www\.)?invidious\.snopyta\.org/|
357                             (?:www\.)?invidious\.kabi\.tk/|
358                             (?:www\.)?vid\.wxzm\.sx/|
359                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
360                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
361                          (?:                                                  # the various things that can precede the ID:
362                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
363                              |(?:                                             # or the v= param in all its forms
364                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
365                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
366                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
367                                  v=
368                              )
369                          ))
370                          |(?:
371                             youtu\.be|                                        # just youtu.be/xxxx
372                             vid\.plus|                                        # or vid.plus/xxxx
373                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
374                          )/
375                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
376                          )
377                      )?                                                       # all until now is optional -> you can pass the naked ID
378                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
379                      (?!.*?\blist=
380                         (?:
381                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
382                             WL                                                # WL are handled by the watch later IE
383                         )
384                      )
385                      (?(1).+)?                                                # if we found the ID, everything can follow
386                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
387     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
388     _formats = {
389         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
390         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
391         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
392         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
393         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
394         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
395         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
396         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
397         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
398         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
399         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
400         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
401         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
402         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
403         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
404         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
405         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
406         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
407
408
409         # 3D videos
410         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
411         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
412         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
413         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
414         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
415         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
416         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
417
418         # Apple HTTP Live Streaming
419         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
420         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
421         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
422         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
423         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
424         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
425         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
426         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
427
428         # DASH mp4 video
429         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
430         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
431         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
432         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
433         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
434         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
435         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
436         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
437         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
438         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
439         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
440         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
441
442         # Dash mp4 audio
443         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
444         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
445         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
446         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
447         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
448         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
449         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
450
451         # Dash webm
452         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
453         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
454         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
455         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
456         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
457         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
458         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
459         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
460         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
461         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
462         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
463         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
464         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
465         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
466         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
467         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
468         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
469         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
470         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
471         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
472         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
473         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
474
475         # Dash webm audio
476         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
477         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
478
479         # Dash webm audio with opus inside
480         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
481         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
482         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
483
484         # RTMP (unnamed)
485         '_rtmp': {'protocol': 'rtmp'},
486     }
487     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
488
489     _GEO_BYPASS = False
490
491     IE_NAME = 'youtube'
492     _TESTS = [
493         {
494             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
495             'info_dict': {
496                 'id': 'BaW_jenozKc',
497                 'ext': 'mp4',
498                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
499                 'uploader': 'Philipp Hagemeister',
500                 'uploader_id': 'phihag',
501                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
502                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
503                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
504                 'upload_date': '20121002',
505                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
506                 'categories': ['Science & Technology'],
507                 'tags': ['youtube-dl'],
508                 'duration': 10,
509                 'view_count': int,
510                 'like_count': int,
511                 'dislike_count': int,
512                 'start_time': 1,
513                 'end_time': 9,
514             }
515         },
516         {
517             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
518             'note': 'Test generic use_cipher_signature video (#897)',
519             'info_dict': {
520                 'id': 'UxxajLWwzqY',
521                 'ext': 'mp4',
522                 'upload_date': '20120506',
523                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
524                 'alt_title': 'I Love It (feat. Charli XCX)',
525                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
526                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
527                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
528                          'iconic ep', 'iconic', 'love', 'it'],
529                 'duration': 180,
530                 'uploader': 'Icona Pop',
531                 'uploader_id': 'IconaPop',
532                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
533                 'creator': 'Icona Pop',
534                 'track': 'I Love It (feat. Charli XCX)',
535                 'artist': 'Icona Pop',
536             }
537         },
538         {
539             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
540             'note': 'Test VEVO video with age protection (#956)',
541             'info_dict': {
542                 'id': '07FYdnEawAQ',
543                 'ext': 'mp4',
544                 'upload_date': '20130703',
545                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
546                 'alt_title': 'Tunnel Vision',
547                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
548                 'duration': 419,
549                 'uploader': 'justintimberlakeVEVO',
550                 'uploader_id': 'justintimberlakeVEVO',
551                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
552                 'creator': 'Justin Timberlake',
553                 'track': 'Tunnel Vision',
554                 'artist': 'Justin Timberlake',
555                 'age_limit': 18,
556             }
557         },
558         {
559             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
560             'note': 'Embed-only video (#1746)',
561             'info_dict': {
562                 'id': 'yZIXLfi8CZQ',
563                 'ext': 'mp4',
564                 'upload_date': '20120608',
565                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
566                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
567                 'uploader': 'SET India',
568                 'uploader_id': 'setindia',
569                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
570                 'age_limit': 18,
571             }
572         },
573         {
574             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
575             'note': 'Use the first video ID in the URL',
576             'info_dict': {
577                 'id': 'BaW_jenozKc',
578                 'ext': 'mp4',
579                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
580                 'uploader': 'Philipp Hagemeister',
581                 'uploader_id': 'phihag',
582                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
583                 'upload_date': '20121002',
584                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
585                 'categories': ['Science & Technology'],
586                 'tags': ['youtube-dl'],
587                 'duration': 10,
588                 'view_count': int,
589                 'like_count': int,
590                 'dislike_count': int,
591             },
592             'params': {
593                 'skip_download': True,
594             },
595         },
596         {
597             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
598             'note': '256k DASH audio (format 141) via DASH manifest',
599             'info_dict': {
600                 'id': 'a9LDPn-MO4I',
601                 'ext': 'm4a',
602                 'upload_date': '20121002',
603                 'uploader_id': '8KVIDEO',
604                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
605                 'description': '',
606                 'uploader': '8KVIDEO',
607                 'title': 'UHDTV TEST 8K VIDEO.mp4'
608             },
609             'params': {
610                 'youtube_include_dash_manifest': True,
611                 'format': '141',
612             },
613             'skip': 'format 141 not served anymore',
614         },
615         # DASH manifest with encrypted signature
616         {
617             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
618             'info_dict': {
619                 'id': 'IB3lcPjvWLA',
620                 'ext': 'm4a',
621                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
622                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
623                 'duration': 244,
624                 'uploader': 'AfrojackVEVO',
625                 'uploader_id': 'AfrojackVEVO',
626                 'upload_date': '20131011',
627             },
628             'params': {
629                 'youtube_include_dash_manifest': True,
630                 'format': '141/bestaudio[ext=m4a]',
631             },
632         },
633         # JS player signature function name containing $
634         {
635             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
636             'info_dict': {
637                 'id': 'nfWlot6h_JM',
638                 'ext': 'm4a',
639                 'title': 'Taylor Swift - Shake It Off',
640                 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
641                 'duration': 242,
642                 'uploader': 'TaylorSwiftVEVO',
643                 'uploader_id': 'TaylorSwiftVEVO',
644                 'upload_date': '20140818',
645                 'creator': 'Taylor Swift',
646             },
647             'params': {
648                 'youtube_include_dash_manifest': True,
649                 'format': '141/bestaudio[ext=m4a]',
650             },
651         },
652         # Controversy video
653         {
654             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
655             'info_dict': {
656                 'id': 'T4XJQO3qol8',
657                 'ext': 'mp4',
658                 'duration': 219,
659                 'upload_date': '20100909',
660                 'uploader': 'Amazing Atheist',
661                 'uploader_id': 'TheAmazingAtheist',
662                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
663                 'title': 'Burning Everyone\'s Koran',
664                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
665             }
666         },
667         # Normal age-gate video (No vevo, embed allowed)
668         {
669             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
670             'info_dict': {
671                 'id': 'HtVdAasjOgU',
672                 'ext': 'mp4',
673                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
674                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
675                 'duration': 142,
676                 'uploader': 'The Witcher',
677                 'uploader_id': 'WitcherGame',
678                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
679                 'upload_date': '20140605',
680                 'age_limit': 18,
681             },
682         },
683         # Age-gate video with encrypted signature
684         {
685             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
686             'info_dict': {
687                 'id': '6kLq3WMV1nU',
688                 'ext': 'mp4',
689                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
690                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
691                 'duration': 246,
692                 'uploader': 'LloydVEVO',
693                 'uploader_id': 'LloydVEVO',
694                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
695                 'upload_date': '20110629',
696                 'age_limit': 18,
697             },
698         },
699         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
700         # YouTube Red ad is not captured for creator
701         {
702             'url': '__2ABJjxzNo',
703             'info_dict': {
704                 'id': '__2ABJjxzNo',
705                 'ext': 'mp4',
706                 'duration': 266,
707                 'upload_date': '20100430',
708                 'uploader_id': 'deadmau5',
709                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
710                 'creator': 'deadmau5',
711                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
712                 'uploader': 'deadmau5',
713                 'title': 'Deadmau5 - Some Chords (HD)',
714                 'alt_title': 'Some Chords',
715             },
716             'expected_warnings': [
717                 'DASH manifest missing',
718             ]
719         },
720         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
721         {
722             'url': 'lqQg6PlCWgI',
723             'info_dict': {
724                 'id': 'lqQg6PlCWgI',
725                 'ext': 'mp4',
726                 'duration': 6085,
727                 'upload_date': '20150827',
728                 'uploader_id': 'olympic',
729                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
730                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
731                 'uploader': 'Olympic',
732                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
733             },
734             'params': {
735                 'skip_download': 'requires avconv',
736             }
737         },
738         # Non-square pixels
739         {
740             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
741             'info_dict': {
742                 'id': '_b-2C3KPAM0',
743                 'ext': 'mp4',
744                 'stretched_ratio': 16 / 9.,
745                 'duration': 85,
746                 'upload_date': '20110310',
747                 'uploader_id': 'AllenMeow',
748                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
749                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
750                 'uploader': '孫ᄋᄅ',
751                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
752             },
753         },
754         # url_encoded_fmt_stream_map is empty string
755         {
756             'url': 'qEJwOuvDf7I',
757             'info_dict': {
758                 'id': 'qEJwOuvDf7I',
759                 'ext': 'webm',
760                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
761                 'description': '',
762                 'upload_date': '20150404',
763                 'uploader_id': 'spbelect',
764                 'uploader': 'Наблюдатели Петербурга',
765             },
766             'params': {
767                 'skip_download': 'requires avconv',
768             },
769             'skip': 'This live event has ended.',
770         },
771         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
772         {
773             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
774             'info_dict': {
775                 'id': 'FIl7x6_3R5Y',
776                 'ext': 'webm',
777                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
778                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
779                 'duration': 220,
780                 'upload_date': '20150625',
781                 'uploader_id': 'dorappi2000',
782                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
783                 'uploader': 'dorappi2000',
784                 'formats': 'mincount:31',
785             },
786             'skip': 'not actual anymore',
787         },
788         # DASH manifest with segment_list
789         {
790             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
791             'md5': '8ce563a1d667b599d21064e982ab9e31',
792             'info_dict': {
793                 'id': 'CsmdDsKjzN8',
794                 'ext': 'mp4',
795                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
796                 'uploader': 'Airtek',
797                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
798                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
799                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
800             },
801             'params': {
802                 'youtube_include_dash_manifest': True,
803                 'format': '135',  # bestvideo
804             },
805             'skip': 'This live event has ended.',
806         },
807         {
808             # Multifeed videos (multiple cameras), URL is for Main Camera
809             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
810             'info_dict': {
811                 'id': 'jqWvoWXjCVs',
812                 'title': 'teamPGP: Rocket League Noob Stream',
813                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
814             },
815             'playlist': [{
816                 'info_dict': {
817                     'id': 'jqWvoWXjCVs',
818                     'ext': 'mp4',
819                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
820                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
821                     'duration': 7335,
822                     'upload_date': '20150721',
823                     'uploader': 'Beer Games Beer',
824                     'uploader_id': 'beergamesbeer',
825                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
826                     'license': 'Standard YouTube License',
827                 },
828             }, {
829                 'info_dict': {
830                     'id': '6h8e8xoXJzg',
831                     'ext': 'mp4',
832                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
833                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
834                     'duration': 7337,
835                     'upload_date': '20150721',
836                     'uploader': 'Beer Games Beer',
837                     'uploader_id': 'beergamesbeer',
838                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
839                     'license': 'Standard YouTube License',
840                 },
841             }, {
842                 'info_dict': {
843                     'id': 'PUOgX5z9xZw',
844                     'ext': 'mp4',
845                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
846                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
847                     'duration': 7337,
848                     'upload_date': '20150721',
849                     'uploader': 'Beer Games Beer',
850                     'uploader_id': 'beergamesbeer',
851                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
852                     'license': 'Standard YouTube License',
853                 },
854             }, {
855                 'info_dict': {
856                     'id': 'teuwxikvS5k',
857                     'ext': 'mp4',
858                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
859                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
860                     'duration': 7334,
861                     'upload_date': '20150721',
862                     'uploader': 'Beer Games Beer',
863                     'uploader_id': 'beergamesbeer',
864                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
865                     'license': 'Standard YouTube License',
866                 },
867             }],
868             'params': {
869                 'skip_download': True,
870             },
871             'skip': 'This video is not available.',
872         },
873         {
874             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
875             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
876             'info_dict': {
877                 'id': 'gVfLd0zydlo',
878                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
879             },
880             'playlist_count': 2,
881             'skip': 'Not multifeed anymore',
882         },
883         {
884             'url': 'https://vid.plus/FlRa-iH7PGw',
885             'only_matching': True,
886         },
887         {
888             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
889             'only_matching': True,
890         },
891         {
892             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
893             # Also tests cut-off URL expansion in video description (see
894             # https://github.com/ytdl-org/youtube-dl/issues/1892,
895             # https://github.com/ytdl-org/youtube-dl/issues/8164)
896             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
897             'info_dict': {
898                 'id': 'lsguqyKfVQg',
899                 'ext': 'mp4',
900                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
901                 'alt_title': 'Dark Walk - Position Music',
902                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
903                 'duration': 133,
904                 'upload_date': '20151119',
905                 'uploader_id': 'IronSoulElf',
906                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
907                 'uploader': 'IronSoulElf',
908                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
909                 'track': 'Dark Walk - Position Music',
910                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
911                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
912             },
913             'params': {
914                 'skip_download': True,
915             },
916         },
917         {
918             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
919             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
920             'only_matching': True,
921         },
922         {
923             # Video with yt:stretch=17:0
924             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
925             'info_dict': {
926                 'id': 'Q39EVAstoRM',
927                 'ext': 'mp4',
928                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
929                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
930                 'upload_date': '20151107',
931                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
932                 'uploader': 'CH GAMER DROID',
933             },
934             'params': {
935                 'skip_download': True,
936             },
937             'skip': 'This video does not exist.',
938         },
939         {
940             # Video licensed under Creative Commons
941             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
942             'info_dict': {
943                 'id': 'M4gD1WSo5mA',
944                 'ext': 'mp4',
945                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
946                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
947                 'duration': 721,
948                 'upload_date': '20150127',
949                 'uploader_id': 'BerkmanCenter',
950                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
951                 'uploader': 'The Berkman Klein Center for Internet & Society',
952                 'license': 'Creative Commons Attribution license (reuse allowed)',
953             },
954             'params': {
955                 'skip_download': True,
956             },
957         },
958         {
959             # Channel-like uploader_url
960             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
961             'info_dict': {
962                 'id': 'eQcmzGIKrzg',
963                 'ext': 'mp4',
964                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
965                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
966                 'duration': 4060,
967                 'upload_date': '20151119',
968                 'uploader': 'Bernie Sanders',
969                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
970                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
971                 'license': 'Creative Commons Attribution license (reuse allowed)',
972             },
973             'params': {
974                 'skip_download': True,
975             },
976         },
977         {
978             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
979             'only_matching': True,
980         },
981         {
982             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
983             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
984             'only_matching': True,
985         },
986         {
987             # Rental video preview
988             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
989             'info_dict': {
990                 'id': 'uGpuVWrhIzE',
991                 'ext': 'mp4',
992                 'title': 'Piku - Trailer',
993                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
994                 'upload_date': '20150811',
995                 'uploader': 'FlixMatrix',
996                 'uploader_id': 'FlixMatrixKaravan',
997                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
998                 'license': 'Standard YouTube License',
999             },
1000             'params': {
1001                 'skip_download': True,
1002             },
1003             'skip': 'This video is not available.',
1004         },
1005         {
1006             # YouTube Red video with episode data
1007             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1008             'info_dict': {
1009                 'id': 'iqKdEhx-dD4',
1010                 'ext': 'mp4',
1011                 'title': 'Isolation - Mind Field (Ep 1)',
1012                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1013                 'duration': 2085,
1014                 'upload_date': '20170118',
1015                 'uploader': 'Vsauce',
1016                 'uploader_id': 'Vsauce',
1017                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1018                 'series': 'Mind Field',
1019                 'season_number': 1,
1020                 'episode_number': 1,
1021             },
1022             'params': {
1023                 'skip_download': True,
1024             },
1025             'expected_warnings': [
1026                 'Skipping DASH manifest',
1027             ],
1028         },
1029         {
1030             # The following content has been identified by the YouTube community
1031             # as inappropriate or offensive to some audiences.
1032             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1033             'info_dict': {
1034                 'id': '6SJNVb0GnPI',
1035                 'ext': 'mp4',
1036                 'title': 'Race Differences in Intelligence',
1037                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1038                 'duration': 965,
1039                 'upload_date': '20140124',
1040                 'uploader': 'New Century Foundation',
1041                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1042                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1043             },
1044             'params': {
1045                 'skip_download': True,
1046             },
1047         },
1048         {
1049             # itag 212
1050             'url': '1t24XAntNCY',
1051             'only_matching': True,
1052         },
1053         {
1054             # geo restricted to JP
1055             'url': 'sJL6WA-aGkQ',
1056             'only_matching': True,
1057         },
1058         {
1059             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1060             'only_matching': True,
1061         },
1062         {
1063             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1064             'only_matching': True,
1065         },
1066         {
1067             # DRM protected
1068             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1069             'only_matching': True,
1070         },
1071         {
1072             # Video with unsupported adaptive stream type formats
1073             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1074             'info_dict': {
1075                 'id': 'Z4Vy8R84T1U',
1076                 'ext': 'mp4',
1077                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1078                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1079                 'duration': 433,
1080                 'upload_date': '20130923',
1081                 'uploader': 'Amelia Putri Harwita',
1082                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1083                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1084                 'formats': 'maxcount:10',
1085             },
1086             'params': {
1087                 'skip_download': True,
1088                 'youtube_include_dash_manifest': False,
1089             },
1090         },
1091         {
1092             # Youtube Music Auto-generated description
1093             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1094             'info_dict': {
1095                 'id': 'MgNrAu2pzNs',
1096                 'ext': 'mp4',
1097                 'title': 'Voyeur Girl',
1098                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1099                 'upload_date': '20190312',
1100                 'uploader': 'Various Artists - Topic',
1101                 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1102                 'artist': 'Stephen',
1103                 'track': 'Voyeur Girl',
1104                 'album': 'it\'s too much love to know my dear',
1105                 'release_date': '20190313',
1106                 'release_year': 2019,
1107             },
1108             'params': {
1109                 'skip_download': True,
1110             },
1111         },
1112         {
1113             # Youtube Music Auto-generated description
1114             # Retrieve 'artist' field from 'Artist:' in video description
1115             # when it is present on youtube music video
1116             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1117             'info_dict': {
1118                 'id': 'k0jLE7tTwjY',
1119                 'ext': 'mp4',
1120                 'title': 'Latch Feat. Sam Smith',
1121                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1122                 'upload_date': '20150110',
1123                 'uploader': 'Various Artists - Topic',
1124                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1125                 'artist': 'Disclosure',
1126                 'track': 'Latch Feat. Sam Smith',
1127                 'album': 'Latch Featuring Sam Smith',
1128                 'release_date': '20121008',
1129                 'release_year': 2012,
1130             },
1131             'params': {
1132                 'skip_download': True,
1133             },
1134         },
1135         {
1136             # Youtube Music Auto-generated description
1137             # handle multiple artists on youtube music video
1138             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1139             'info_dict': {
1140                 'id': '74qn0eJSjpA',
1141                 'ext': 'mp4',
1142                 'title': 'Eastside',
1143                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1144                 'upload_date': '20180710',
1145                 'uploader': 'Benny Blanco - Topic',
1146                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1147                 'artist': 'benny blanco, Halsey, Khalid',
1148                 'track': 'Eastside',
1149                 'album': 'Eastside',
1150                 'release_date': '20180713',
1151                 'release_year': 2018,
1152             },
1153             'params': {
1154                 'skip_download': True,
1155             },
1156         },
1157         {
1158             # Youtube Music Auto-generated description
1159             # handle youtube music video with release_year and no release_date
1160             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1161             'info_dict': {
1162                 'id': '-hcAI0g-f5M',
1163                 'ext': 'mp4',
1164                 'title': 'Put It On Me',
1165                 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1166                 'upload_date': '20180426',
1167                 'uploader': 'Matt Maeson - Topic',
1168                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1169                 'artist': 'Matt Maeson',
1170                 'track': 'Put It On Me',
1171                 'album': 'The Hearse',
1172                 'release_date': None,
1173                 'release_year': 2018,
1174             },
1175             'params': {
1176                 'skip_download': True,
1177             },
1178         },
1179     ]
1180
1181     def __init__(self, *args, **kwargs):
1182         super(YoutubeIE, self).__init__(*args, **kwargs)
1183         self._player_cache = {}
1184
1185     def report_video_info_webpage_download(self, video_id):
1186         """Report attempt to download video info webpage."""
1187         self.to_screen('%s: Downloading video info webpage' % video_id)
1188
1189     def report_information_extraction(self, video_id):
1190         """Report attempt to extract video information."""
1191         self.to_screen('%s: Extracting video information' % video_id)
1192
1193     def report_unavailable_format(self, video_id, format):
1194         """Report extracted video URL."""
1195         self.to_screen('%s: Format %s not available' % (video_id, format))
1196
1197     def report_rtmp_download(self):
1198         """Indicate the download will use the RTMP protocol."""
1199         self.to_screen('RTMP download detected')
1200
1201     def _signature_cache_id(self, example_sig):
1202         """ Return a string representation of a signature """
1203         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1204
1205     def _extract_signature_function(self, video_id, player_url, example_sig):
1206         id_m = re.match(
1207             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1208             player_url)
1209         if not id_m:
1210             raise ExtractorError('Cannot identify player %r' % player_url)
1211         player_type = id_m.group('ext')
1212         player_id = id_m.group('id')
1213
1214         # Read from filesystem cache
1215         func_id = '%s_%s_%s' % (
1216             player_type, player_id, self._signature_cache_id(example_sig))
1217         assert os.path.basename(func_id) == func_id
1218
1219         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1220         if cache_spec is not None:
1221             return lambda s: ''.join(s[i] for i in cache_spec)
1222
1223         download_note = (
1224             'Downloading player %s' % player_url
1225             if self._downloader.params.get('verbose') else
1226             'Downloading %s player %s' % (player_type, player_id)
1227         )
1228         if player_type == 'js':
1229             code = self._download_webpage(
1230                 player_url, video_id,
1231                 note=download_note,
1232                 errnote='Download of %s failed' % player_url)
1233             res = self._parse_sig_js(code)
1234         elif player_type == 'swf':
1235             urlh = self._request_webpage(
1236                 player_url, video_id,
1237                 note=download_note,
1238                 errnote='Download of %s failed' % player_url)
1239             code = urlh.read()
1240             res = self._parse_sig_swf(code)
1241         else:
1242             assert False, 'Invalid player type %r' % player_type
1243
1244         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1245         cache_res = res(test_string)
1246         cache_spec = [ord(c) for c in cache_res]
1247
1248         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1249         return res
1250
1251     def _print_sig_code(self, func, example_sig):
1252         def gen_sig_code(idxs):
1253             def _genslice(start, end, step):
1254                 starts = '' if start == 0 else str(start)
1255                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1256                 steps = '' if step == 1 else (':%d' % step)
1257                 return 's[%s%s%s]' % (starts, ends, steps)
1258
1259             step = None
1260             # Quelch pyflakes warnings - start will be set when step is set
1261             start = '(Never used)'
1262             for i, prev in zip(idxs[1:], idxs[:-1]):
1263                 if step is not None:
1264                     if i - prev == step:
1265                         continue
1266                     yield _genslice(start, prev, step)
1267                     step = None
1268                     continue
1269                 if i - prev in [-1, 1]:
1270                     step = i - prev
1271                     start = prev
1272                     continue
1273                 else:
1274                     yield 's[%d]' % prev
1275             if step is None:
1276                 yield 's[%d]' % i
1277             else:
1278                 yield _genslice(start, i, step)
1279
1280         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1281         cache_res = func(test_string)
1282         cache_spec = [ord(c) for c in cache_res]
1283         expr_code = ' + '.join(gen_sig_code(cache_spec))
1284         signature_id_tuple = '(%s)' % (
1285             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1286         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1287                 '    return %s\n') % (signature_id_tuple, expr_code)
1288         self.to_screen('Extracted signature function:\n' + code)
1289
1290     def _parse_sig_js(self, jscode):
1291         funcname = self._search_regex(
1292             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1293              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1294              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(',
1295              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1296              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1297             jscode, 'Initial JS player signature function name', group='sig')
1298
1299         jsi = JSInterpreter(jscode)
1300         initial_function = jsi.extract_function(funcname)
1301         return lambda s: initial_function([s])
1302
1303     def _parse_sig_swf(self, file_contents):
1304         swfi = SWFInterpreter(file_contents)
1305         TARGET_CLASSNAME = 'SignatureDecipher'
1306         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1307         initial_function = swfi.extract_function(searched_class, 'decipher')
1308         return lambda s: initial_function([s])
1309
1310     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1311         """Turn the encrypted s field into a working signature"""
1312
1313         if player_url is None:
1314             raise ExtractorError('Cannot decrypt signature without player_url')
1315
1316         if player_url.startswith('//'):
1317             player_url = 'https:' + player_url
1318         elif not re.match(r'https?://', player_url):
1319             player_url = compat_urlparse.urljoin(
1320                 'https://www.youtube.com', player_url)
1321         try:
1322             player_id = (player_url, self._signature_cache_id(s))
1323             if player_id not in self._player_cache:
1324                 func = self._extract_signature_function(
1325                     video_id, player_url, s
1326                 )
1327                 self._player_cache[player_id] = func
1328             func = self._player_cache[player_id]
1329             if self._downloader.params.get('youtube_print_sig_code'):
1330                 self._print_sig_code(func, s)
1331             return func(s)
1332         except Exception as e:
1333             tb = traceback.format_exc()
1334             raise ExtractorError(
1335                 'Signature extraction failed: ' + tb, cause=e)
1336
1337     def _get_subtitles(self, video_id, webpage):
1338         try:
1339             subs_doc = self._download_xml(
1340                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1341                 video_id, note=False)
1342         except ExtractorError as err:
1343             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1344             return {}
1345
1346         sub_lang_list = {}
1347         for track in subs_doc.findall('track'):
1348             lang = track.attrib['lang_code']
1349             if lang in sub_lang_list:
1350                 continue
1351             sub_formats = []
1352             for ext in self._SUBTITLE_FORMATS:
1353                 params = compat_urllib_parse_urlencode({
1354                     'lang': lang,
1355                     'v': video_id,
1356                     'fmt': ext,
1357                     'name': track.attrib['name'].encode('utf-8'),
1358                 })
1359                 sub_formats.append({
1360                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1361                     'ext': ext,
1362                 })
1363             sub_lang_list[lang] = sub_formats
1364         if not sub_lang_list:
1365             self._downloader.report_warning('video doesn\'t have subtitles')
1366             return {}
1367         return sub_lang_list
1368
1369     def _get_ytplayer_config(self, video_id, webpage):
1370         patterns = (
1371             # User data may contain arbitrary character sequences that may affect
1372             # JSON extraction with regex, e.g. when '};' is contained the second
1373             # regex won't capture the whole JSON. Yet working around by trying more
1374             # concrete regex first keeping in mind proper quoted string handling
1375             # to be implemented in future that will replace this workaround (see
1376             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1377             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1378             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1379             r';ytplayer\.config\s*=\s*({.+?});',
1380         )
1381         config = self._search_regex(
1382             patterns, webpage, 'ytplayer.config', default=None)
1383         if config:
1384             return self._parse_json(
1385                 uppercase_escape(config), video_id, fatal=False)
1386
1387     def _get_automatic_captions(self, video_id, webpage):
1388         """We need the webpage for getting the captions url, pass it as an
1389            argument to speed up the process."""
1390         self.to_screen('%s: Looking for automatic captions' % video_id)
1391         player_config = self._get_ytplayer_config(video_id, webpage)
1392         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1393         if not player_config:
1394             self._downloader.report_warning(err_msg)
1395             return {}
1396         try:
1397             args = player_config['args']
1398             caption_url = args.get('ttsurl')
1399             if caption_url:
1400                 timestamp = args['timestamp']
1401                 # We get the available subtitles
1402                 list_params = compat_urllib_parse_urlencode({
1403                     'type': 'list',
1404                     'tlangs': 1,
1405                     'asrs': 1,
1406                 })
1407                 list_url = caption_url + '&' + list_params
1408                 caption_list = self._download_xml(list_url, video_id)
1409                 original_lang_node = caption_list.find('track')
1410                 if original_lang_node is None:
1411                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1412                     return {}
1413                 original_lang = original_lang_node.attrib['lang_code']
1414                 caption_kind = original_lang_node.attrib.get('kind', '')
1415
1416                 sub_lang_list = {}
1417                 for lang_node in caption_list.findall('target'):
1418                     sub_lang = lang_node.attrib['lang_code']
1419                     sub_formats = []
1420                     for ext in self._SUBTITLE_FORMATS:
1421                         params = compat_urllib_parse_urlencode({
1422                             'lang': original_lang,
1423                             'tlang': sub_lang,
1424                             'fmt': ext,
1425                             'ts': timestamp,
1426                             'kind': caption_kind,
1427                         })
1428                         sub_formats.append({
1429                             'url': caption_url + '&' + params,
1430                             'ext': ext,
1431                         })
1432                     sub_lang_list[sub_lang] = sub_formats
1433                 return sub_lang_list
1434
1435             def make_captions(sub_url, sub_langs):
1436                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1437                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1438                 captions = {}
1439                 for sub_lang in sub_langs:
1440                     sub_formats = []
1441                     for ext in self._SUBTITLE_FORMATS:
1442                         caption_qs.update({
1443                             'tlang': [sub_lang],
1444                             'fmt': [ext],
1445                         })
1446                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1447                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1448                         sub_formats.append({
1449                             'url': sub_url,
1450                             'ext': ext,
1451                         })
1452                     captions[sub_lang] = sub_formats
1453                 return captions
1454
1455             # New captions format as of 22.06.2017
1456             player_response = args.get('player_response')
1457             if player_response and isinstance(player_response, compat_str):
1458                 player_response = self._parse_json(
1459                     player_response, video_id, fatal=False)
1460                 if player_response:
1461                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1462                     base_url = renderer['captionTracks'][0]['baseUrl']
1463                     sub_lang_list = []
1464                     for lang in renderer['translationLanguages']:
1465                         lang_code = lang.get('languageCode')
1466                         if lang_code:
1467                             sub_lang_list.append(lang_code)
1468                     return make_captions(base_url, sub_lang_list)
1469
1470             # Some videos don't provide ttsurl but rather caption_tracks and
1471             # caption_translation_languages (e.g. 20LmZk1hakA)
1472             # Does not used anymore as of 22.06.2017
1473             caption_tracks = args['caption_tracks']
1474             caption_translation_languages = args['caption_translation_languages']
1475             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1476             sub_lang_list = []
1477             for lang in caption_translation_languages.split(','):
1478                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1479                 sub_lang = lang_qs.get('lc', [None])[0]
1480                 if sub_lang:
1481                     sub_lang_list.append(sub_lang)
1482             return make_captions(caption_url, sub_lang_list)
1483         # An extractor error can be raise by the download process if there are
1484         # no automatic captions but there are subtitles
1485         except (KeyError, IndexError, ExtractorError):
1486             self._downloader.report_warning(err_msg)
1487             return {}
1488
1489     def _mark_watched(self, video_id, video_info, player_response):
1490         playback_url = url_or_none(try_get(
1491             player_response,
1492             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1493             video_info, lambda x: x['videostats_playback_base_url'][0]))
1494         if not playback_url:
1495             return
1496         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1497         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1498
1499         # cpn generation algorithm is reverse engineered from base.js.
1500         # In fact it works even with dummy cpn.
1501         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1502         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1503
1504         qs.update({
1505             'ver': ['2'],
1506             'cpn': [cpn],
1507         })
1508         playback_url = compat_urlparse.urlunparse(
1509             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1510
1511         self._download_webpage(
1512             playback_url, video_id, 'Marking watched',
1513             'Unable to mark watched', fatal=False)
1514
1515     @staticmethod
1516     def _extract_urls(webpage):
1517         # Embedded YouTube player
1518         entries = [
1519             unescapeHTML(mobj.group('url'))
1520             for mobj in re.finditer(r'''(?x)
1521             (?:
1522                 <iframe[^>]+?src=|
1523                 data-video-url=|
1524                 <embed[^>]+?src=|
1525                 embedSWF\(?:\s*|
1526                 <object[^>]+data=|
1527                 new\s+SWFObject\(
1528             )
1529             (["\'])
1530                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1531                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1532             \1''', webpage)]
1533
1534         # lazyYT YouTube embed
1535         entries.extend(list(map(
1536             unescapeHTML,
1537             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1538
1539         # Wordpress "YouTube Video Importer" plugin
1540         matches = re.findall(r'''(?x)<div[^>]+
1541             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1542             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1543         entries.extend(m[-1] for m in matches)
1544
1545         return entries
1546
1547     @staticmethod
1548     def _extract_url(webpage):
1549         urls = YoutubeIE._extract_urls(webpage)
1550         return urls[0] if urls else None
1551
1552     @classmethod
1553     def extract_id(cls, url):
1554         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1555         if mobj is None:
1556             raise ExtractorError('Invalid URL: %s' % url)
1557         video_id = mobj.group(2)
1558         return video_id
1559
1560     def _extract_annotations(self, video_id):
1561         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1562         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1563
1564     @staticmethod
1565     def _extract_chapters(description, duration):
1566         if not description:
1567             return None
1568         chapter_lines = re.findall(
1569             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1570             description)
1571         if not chapter_lines:
1572             return None
1573         chapters = []
1574         for next_num, (chapter_line, time_point) in enumerate(
1575                 chapter_lines, start=1):
1576             start_time = parse_duration(time_point)
1577             if start_time is None:
1578                 continue
1579             if start_time > duration:
1580                 break
1581             end_time = (duration if next_num == len(chapter_lines)
1582                         else parse_duration(chapter_lines[next_num][1]))
1583             if end_time is None:
1584                 continue
1585             if end_time > duration:
1586                 end_time = duration
1587             if start_time > end_time:
1588                 break
1589             chapter_title = re.sub(
1590                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1591             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1592             chapters.append({
1593                 'start_time': start_time,
1594                 'end_time': end_time,
1595                 'title': chapter_title,
1596             })
1597         return chapters
1598
1599     def _real_extract(self, url):
1600         url, smuggled_data = unsmuggle_url(url, {})
1601
1602         proto = (
1603             'http' if self._downloader.params.get('prefer_insecure', False)
1604             else 'https')
1605
1606         start_time = None
1607         end_time = None
1608         parsed_url = compat_urllib_parse_urlparse(url)
1609         for component in [parsed_url.fragment, parsed_url.query]:
1610             query = compat_parse_qs(component)
1611             if start_time is None and 't' in query:
1612                 start_time = parse_duration(query['t'][0])
1613             if start_time is None and 'start' in query:
1614                 start_time = parse_duration(query['start'][0])
1615             if end_time is None and 'end' in query:
1616                 end_time = parse_duration(query['end'][0])
1617
1618         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1619         mobj = re.search(self._NEXT_URL_RE, url)
1620         if mobj:
1621             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1622         video_id = self.extract_id(url)
1623
1624         # Get video webpage
1625         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1626         video_webpage = self._download_webpage(url, video_id)
1627
1628         # Attempt to extract SWF player URL
1629         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1630         if mobj is not None:
1631             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1632         else:
1633             player_url = None
1634
1635         dash_mpds = []
1636
1637         def add_dash_mpd(video_info):
1638             dash_mpd = video_info.get('dashmpd')
1639             if dash_mpd and dash_mpd[0] not in dash_mpds:
1640                 dash_mpds.append(dash_mpd[0])
1641
1642         def add_dash_mpd_pr(pl_response):
1643             dash_mpd = url_or_none(try_get(
1644                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1645                 compat_str))
1646             if dash_mpd and dash_mpd not in dash_mpds:
1647                 dash_mpds.append(dash_mpd)
1648
1649         is_live = None
1650         view_count = None
1651
1652         def extract_view_count(v_info):
1653             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1654
1655         player_response = {}
1656
1657         # Get video info
1658         embed_webpage = None
1659         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1660             age_gate = True
1661             # We simulate the access to the video from www.youtube.com/v/{video_id}
1662             # this can be viewed without login into Youtube
1663             url = proto + '://www.youtube.com/embed/%s' % video_id
1664             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1665             data = compat_urllib_parse_urlencode({
1666                 'video_id': video_id,
1667                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1668                 'sts': self._search_regex(
1669                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1670             })
1671             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1672             video_info_webpage = self._download_webpage(
1673                 video_info_url, video_id,
1674                 note='Refetching age-gated info webpage',
1675                 errnote='unable to download video info webpage')
1676             video_info = compat_parse_qs(video_info_webpage)
1677             add_dash_mpd(video_info)
1678         else:
1679             age_gate = False
1680             video_info = None
1681             sts = None
1682             # Try looking directly into the video webpage
1683             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1684             if ytplayer_config:
1685                 args = ytplayer_config['args']
1686                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1687                     # Convert to the same format returned by compat_parse_qs
1688                     video_info = dict((k, [v]) for k, v in args.items())
1689                     add_dash_mpd(video_info)
1690                 # Rental video is not rented but preview is available (e.g.
1691                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1692                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1693                 if not video_info and args.get('ypc_vid'):
1694                     return self.url_result(
1695                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1696                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1697                     is_live = True
1698                 sts = ytplayer_config.get('sts')
1699                 if not player_response:
1700                     pl_response = str_or_none(args.get('player_response'))
1701                     if pl_response:
1702                         pl_response = self._parse_json(pl_response, video_id, fatal=False)
1703                         if isinstance(pl_response, dict):
1704                             player_response = pl_response
1705             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1706                 add_dash_mpd_pr(player_response)
1707                 # We also try looking in get_video_info since it may contain different dashmpd
1708                 # URL that points to a DASH manifest with possibly different itag set (some itags
1709                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1710                 # manifest pointed by get_video_info's dashmpd).
1711                 # The general idea is to take a union of itags of both DASH manifests (for example
1712                 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1713                 self.report_video_info_webpage_download(video_id)
1714                 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1715                     query = {
1716                         'video_id': video_id,
1717                         'ps': 'default',
1718                         'eurl': '',
1719                         'gl': 'US',
1720                         'hl': 'en',
1721                     }
1722                     if el:
1723                         query['el'] = el
1724                     if sts:
1725                         query['sts'] = sts
1726                     video_info_webpage = self._download_webpage(
1727                         '%s://www.youtube.com/get_video_info' % proto,
1728                         video_id, note=False,
1729                         errnote='unable to download video info webpage',
1730                         fatal=False, query=query)
1731                     if not video_info_webpage:
1732                         continue
1733                     get_video_info = compat_parse_qs(video_info_webpage)
1734                     if not player_response:
1735                         pl_response = get_video_info.get('player_response', [None])[0]
1736                         if isinstance(pl_response, dict):
1737                             player_response = pl_response
1738                             add_dash_mpd_pr(player_response)
1739                     add_dash_mpd(get_video_info)
1740                     if view_count is None:
1741                         view_count = extract_view_count(get_video_info)
1742                     if not video_info:
1743                         video_info = get_video_info
1744                     get_token = get_video_info.get('token') or get_video_info.get('account_playback_token')
1745                     if get_token:
1746                         # Different get_video_info requests may report different results, e.g.
1747                         # some may report video unavailability, but some may serve it without
1748                         # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1749                         # the original webpage as well as el=info and el=embedded get_video_info
1750                         # requests report video unavailability due to geo restriction while
1751                         # el=detailpage succeeds and returns valid data). This is probably
1752                         # due to YouTube measures against IP ranges of hosting providers.
1753                         # Working around by preferring the first succeeded video_info containing
1754                         # the token if no such video_info yet was found.
1755                         token = video_info.get('token') or video_info.get('account_playback_token')
1756                         if not token:
1757                             video_info = get_video_info
1758                         break
1759
1760         def extract_unavailable_message():
1761             return self._html_search_regex(
1762                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1763                 video_webpage, 'unavailable message', default=None)
1764
1765         if not video_info:
1766             unavailable_message = extract_unavailable_message()
1767             if not unavailable_message:
1768                 unavailable_message = 'Unable to extract video data'
1769             raise ExtractorError(
1770                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1771
1772         token = video_info.get('token') or video_info.get('account_playback_token')
1773         if not token:
1774             if 'reason' in video_info:
1775                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1776                     regions_allowed = self._html_search_meta(
1777                         'regionsAllowed', video_webpage, default=None)
1778                     countries = regions_allowed.split(',') if regions_allowed else None
1779                     self.raise_geo_restricted(
1780                         msg=video_info['reason'][0], countries=countries)
1781                 reason = video_info['reason'][0]
1782                 if 'Invalid parameters' in reason:
1783                     unavailable_message = extract_unavailable_message()
1784                     if unavailable_message:
1785                         reason = unavailable_message
1786                 raise ExtractorError(
1787                     'YouTube said: %s' % reason,
1788                     expected=True, video_id=video_id)
1789             else:
1790                 raise ExtractorError(
1791                     '"token" parameter not in video info for unknown reason',
1792                     video_id=video_id)
1793
1794         if video_info.get('license_info'):
1795             raise ExtractorError('This video is DRM protected.', expected=True)
1796
1797         video_details = try_get(
1798             player_response, lambda x: x['videoDetails'], dict) or {}
1799
1800         # title
1801         if 'title' in video_info:
1802             video_title = video_info['title'][0]
1803         elif 'title' in player_response:
1804             video_title = video_details['title']
1805         else:
1806             self._downloader.report_warning('Unable to extract video title')
1807             video_title = '_'
1808
1809         # description
1810         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1811         if video_description:
1812
1813             def replace_url(m):
1814                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1815                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1816                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1817                     qs = compat_parse_qs(parsed_redir_url.query)
1818                     q = qs.get('q')
1819                     if q and q[0]:
1820                         return q[0]
1821                 return redir_url
1822
1823             description_original = video_description = re.sub(r'''(?x)
1824                 <a\s+
1825                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1826                     (?:title|href)="([^"]+)"\s+
1827                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1828                     class="[^"]*"[^>]*>
1829                 [^<]+\.{3}\s*
1830                 </a>
1831             ''', replace_url, video_description)
1832             video_description = clean_html(video_description)
1833         else:
1834             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1835             if fd_mobj:
1836                 video_description = unescapeHTML(fd_mobj.group(1))
1837             else:
1838                 video_description = ''
1839
1840         if not smuggled_data.get('force_singlefeed', False):
1841             if not self._downloader.params.get('noplaylist'):
1842                 multifeed_metadata_list = try_get(
1843                     player_response,
1844                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1845                     compat_str) or try_get(
1846                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1847                 if multifeed_metadata_list:
1848                     entries = []
1849                     feed_ids = []
1850                     for feed in multifeed_metadata_list.split(','):
1851                         # Unquote should take place before split on comma (,) since textual
1852                         # fields may contain comma as well (see
1853                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1854                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1855                         entries.append({
1856                             '_type': 'url_transparent',
1857                             'ie_key': 'Youtube',
1858                             'url': smuggle_url(
1859                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1860                                 {'force_singlefeed': True}),
1861                             'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1862                         })
1863                         feed_ids.append(feed_data['id'][0])
1864                     self.to_screen(
1865                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1866                         % (', '.join(feed_ids), video_id))
1867                     return self.playlist_result(entries, video_id, video_title, video_description)
1868             else:
1869                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1870
1871         if view_count is None:
1872             view_count = extract_view_count(video_info)
1873         if view_count is None and video_details:
1874             view_count = int_or_none(video_details.get('viewCount'))
1875
1876         # Check for "rental" videos
1877         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1878             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1879
1880         def _extract_filesize(media_url):
1881             return int_or_none(self._search_regex(
1882                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1883
1884         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1885             self.report_rtmp_download()
1886             formats = [{
1887                 'format_id': '_rtmp',
1888                 'protocol': 'rtmp',
1889                 'url': video_info['conn'][0],
1890                 'player_url': player_url,
1891             }]
1892         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1893             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1894             if 'rtmpe%3Dyes' in encoded_url_map:
1895                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1896             formats_spec = {}
1897             fmt_list = video_info.get('fmt_list', [''])[0]
1898             if fmt_list:
1899                 for fmt in fmt_list.split(','):
1900                     spec = fmt.split('/')
1901                     if len(spec) > 1:
1902                         width_height = spec[1].split('x')
1903                         if len(width_height) == 2:
1904                             formats_spec[spec[0]] = {
1905                                 'resolution': spec[1],
1906                                 'width': int_or_none(width_height[0]),
1907                                 'height': int_or_none(width_height[1]),
1908                             }
1909             q = qualities(['small', 'medium', 'hd720'])
1910             streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1911             if streaming_formats:
1912                 for fmt in streaming_formats:
1913                     itag = str_or_none(fmt.get('itag'))
1914                     if not itag:
1915                         continue
1916                     quality = fmt.get('quality')
1917                     quality_label = fmt.get('qualityLabel') or quality
1918                     formats_spec[itag] = {
1919                         'asr': int_or_none(fmt.get('audioSampleRate')),
1920                         'filesize': int_or_none(fmt.get('contentLength')),
1921                         'format_note': quality_label,
1922                         'fps': int_or_none(fmt.get('fps')),
1923                         'height': int_or_none(fmt.get('height')),
1924                         'quality': q(quality),
1925                         # bitrate for itag 43 is always 2147483647
1926                         'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1927                         'width': int_or_none(fmt.get('width')),
1928                     }
1929             formats = []
1930             for url_data_str in encoded_url_map.split(','):
1931                 url_data = compat_parse_qs(url_data_str)
1932                 if 'itag' not in url_data or 'url' not in url_data:
1933                     continue
1934                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1935                 # Unsupported FORMAT_STREAM_TYPE_OTF
1936                 if stream_type == 3:
1937                     continue
1938                 format_id = url_data['itag'][0]
1939                 url = url_data['url'][0]
1940
1941                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1942                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1943                     jsplayer_url_json = self._search_regex(
1944                         ASSETS_RE,
1945                         embed_webpage if age_gate else video_webpage,
1946                         'JS player URL (1)', default=None)
1947                     if not jsplayer_url_json and not age_gate:
1948                         # We need the embed website after all
1949                         if embed_webpage is None:
1950                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1951                             embed_webpage = self._download_webpage(
1952                                 embed_url, video_id, 'Downloading embed webpage')
1953                         jsplayer_url_json = self._search_regex(
1954                             ASSETS_RE, embed_webpage, 'JS player URL')
1955
1956                     player_url = json.loads(jsplayer_url_json)
1957                     if player_url is None:
1958                         player_url_json = self._search_regex(
1959                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1960                             video_webpage, 'age gate player URL')
1961                         player_url = json.loads(player_url_json)
1962
1963                 if 'sig' in url_data:
1964                     url += '&signature=' + url_data['sig'][0]
1965                 elif 's' in url_data:
1966                     encrypted_sig = url_data['s'][0]
1967
1968                     if self._downloader.params.get('verbose'):
1969                         if player_url is None:
1970                             player_version = 'unknown'
1971                             player_desc = 'unknown'
1972                         else:
1973                             if player_url.endswith('swf'):
1974                                 player_version = self._search_regex(
1975                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1976                                     'flash player', fatal=False)
1977                                 player_desc = 'flash player %s' % player_version
1978                             else:
1979                                 player_version = self._search_regex(
1980                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1981                                      r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
1982                                     player_url,
1983                                     'html5 player', fatal=False)
1984                                 player_desc = 'html5 player %s' % player_version
1985
1986                         parts_sizes = self._signature_cache_id(encrypted_sig)
1987                         self.to_screen('{%s} signature length %s, %s' %
1988                                        (format_id, parts_sizes, player_desc))
1989
1990                     signature = self._decrypt_signature(
1991                         encrypted_sig, video_id, player_url, age_gate)
1992                     url += '&signature=' + signature
1993                 if 'ratebypass' not in url:
1994                     url += '&ratebypass=yes'
1995
1996                 dct = {
1997                     'format_id': format_id,
1998                     'url': url,
1999                     'player_url': player_url,
2000                 }
2001                 if format_id in self._formats:
2002                     dct.update(self._formats[format_id])
2003                 if format_id in formats_spec:
2004                     dct.update(formats_spec[format_id])
2005
2006                 # Some itags are not included in DASH manifest thus corresponding formats will
2007                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2008                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2009                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2010                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2011
2012                 filesize = int_or_none(url_data.get(
2013                     'clen', [None])[0]) or _extract_filesize(url)
2014
2015                 quality = url_data.get('quality', [None])[0]
2016
2017                 more_fields = {
2018                     'filesize': filesize,
2019                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
2020                     'width': width,
2021                     'height': height,
2022                     'fps': int_or_none(url_data.get('fps', [None])[0]),
2023                     'format_note': url_data.get('quality_label', [None])[0] or quality,
2024                     'quality': q(quality),
2025                 }
2026                 for key, value in more_fields.items():
2027                     if value:
2028                         dct[key] = value
2029                 type_ = url_data.get('type', [None])[0]
2030                 if type_:
2031                     type_split = type_.split(';')
2032                     kind_ext = type_split[0].split('/')
2033                     if len(kind_ext) == 2:
2034                         kind, _ = kind_ext
2035                         dct['ext'] = mimetype2ext(type_split[0])
2036                         if kind in ('audio', 'video'):
2037                             codecs = None
2038                             for mobj in re.finditer(
2039                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2040                                 if mobj.group('key') == 'codecs':
2041                                     codecs = mobj.group('val')
2042                                     break
2043                             if codecs:
2044                                 dct.update(parse_codecs(codecs))
2045                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2046                     dct['downloader_options'] = {
2047                         # Youtube throttles chunks >~10M
2048                         'http_chunk_size': 10485760,
2049                     }
2050                 formats.append(dct)
2051         else:
2052             manifest_url = (
2053                 url_or_none(try_get(
2054                     player_response,
2055                     lambda x: x['streamingData']['hlsManifestUrl'],
2056                     compat_str)) or
2057                 url_or_none(try_get(
2058                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2059             if manifest_url:
2060                 formats = []
2061                 m3u8_formats = self._extract_m3u8_formats(
2062                     manifest_url, video_id, 'mp4', fatal=False)
2063                 for a_format in m3u8_formats:
2064                     itag = self._search_regex(
2065                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2066                     if itag:
2067                         a_format['format_id'] = itag
2068                         if itag in self._formats:
2069                             dct = self._formats[itag].copy()
2070                             dct.update(a_format)
2071                             a_format = dct
2072                     a_format['player_url'] = player_url
2073                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2074                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2075                     formats.append(a_format)
2076             else:
2077                 error_message = clean_html(video_info.get('reason', [None])[0])
2078                 if not error_message:
2079                     error_message = extract_unavailable_message()
2080                 if error_message:
2081                     raise ExtractorError(error_message, expected=True)
2082                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2083
2084         # uploader
2085         video_uploader = try_get(
2086             video_info, lambda x: x['author'][0],
2087             compat_str) or str_or_none(video_details.get('author'))
2088         if video_uploader:
2089             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2090         else:
2091             self._downloader.report_warning('unable to extract uploader name')
2092
2093         # uploader_id
2094         video_uploader_id = None
2095         video_uploader_url = None
2096         mobj = re.search(
2097             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2098             video_webpage)
2099         if mobj is not None:
2100             video_uploader_id = mobj.group('uploader_id')
2101             video_uploader_url = mobj.group('uploader_url')
2102         else:
2103             self._downloader.report_warning('unable to extract uploader nickname')
2104
2105         channel_id = self._html_search_meta(
2106             'channelId', video_webpage, 'channel id')
2107         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2108
2109         # thumbnail image
2110         # We try first to get a high quality image:
2111         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2112                             video_webpage, re.DOTALL)
2113         if m_thumb is not None:
2114             video_thumbnail = m_thumb.group(1)
2115         elif 'thumbnail_url' not in video_info:
2116             self._downloader.report_warning('unable to extract video thumbnail')
2117             video_thumbnail = None
2118         else:   # don't panic if we can't find it
2119             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2120
2121         # upload date
2122         upload_date = self._html_search_meta(
2123             'datePublished', video_webpage, 'upload date', default=None)
2124         if not upload_date:
2125             upload_date = self._search_regex(
2126                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2127                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2128                 video_webpage, 'upload date', default=None)
2129         upload_date = unified_strdate(upload_date)
2130
2131         video_license = self._html_search_regex(
2132             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2133             video_webpage, 'license', default=None)
2134
2135         m_music = re.search(
2136             r'''(?x)
2137                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2138                 <ul[^>]*>\s*
2139                 <li>(?P<title>.+?)
2140                 by (?P<creator>.+?)
2141                 (?:
2142                     \(.+?\)|
2143                     <a[^>]*
2144                         (?:
2145                             \bhref=["\']/red[^>]*>|             # drop possible
2146                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2147                         )
2148                     .*?
2149                 )?</li
2150             ''',
2151             video_webpage)
2152         if m_music:
2153             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2154             video_creator = clean_html(m_music.group('creator'))
2155         else:
2156             video_alt_title = video_creator = None
2157
2158         def extract_meta(field):
2159             return self._html_search_regex(
2160                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2161                 video_webpage, field, default=None)
2162
2163         track = extract_meta('Song')
2164         artist = extract_meta('Artist')
2165         album = extract_meta('Album')
2166
2167         # Youtube Music Auto-generated description
2168         release_date = release_year = None
2169         if video_description:
2170             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2171             if mobj:
2172                 if not track:
2173                     track = mobj.group('track').strip()
2174                 if not artist:
2175                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2176                 if not album:
2177                     album = mobj.group('album'.strip())
2178                 release_year = mobj.group('release_year')
2179                 release_date = mobj.group('release_date')
2180                 if release_date:
2181                     release_date = release_date.replace('-', '')
2182                     if not release_year:
2183                         release_year = int(release_date[:4])
2184                 if release_year:
2185                     release_year = int(release_year)
2186
2187         m_episode = re.search(
2188             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2189             video_webpage)
2190         if m_episode:
2191             series = unescapeHTML(m_episode.group('series'))
2192             season_number = int(m_episode.group('season'))
2193             episode_number = int(m_episode.group('episode'))
2194         else:
2195             series = season_number = episode_number = None
2196
2197         m_cat_container = self._search_regex(
2198             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2199             video_webpage, 'categories', default=None)
2200         if m_cat_container:
2201             category = self._html_search_regex(
2202                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2203                 default=None)
2204             video_categories = None if category is None else [category]
2205         else:
2206             video_categories = None
2207
2208         video_tags = [
2209             unescapeHTML(m.group('content'))
2210             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2211
2212         def _extract_count(count_name):
2213             return str_to_int(self._search_regex(
2214                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2215                 % re.escape(count_name),
2216                 video_webpage, count_name, default=None))
2217
2218         like_count = _extract_count('like')
2219         dislike_count = _extract_count('dislike')
2220
2221         if view_count is None:
2222             view_count = str_to_int(self._search_regex(
2223                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2224                 'view count', default=None))
2225
2226         # subtitles
2227         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2228         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2229
2230         video_duration = try_get(
2231             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2232         if not video_duration:
2233             video_duration = int_or_none(video_details.get('lengthSeconds'))
2234         if not video_duration:
2235             video_duration = parse_duration(self._html_search_meta(
2236                 'duration', video_webpage, 'video duration'))
2237
2238         # annotations
2239         video_annotations = None
2240         if self._downloader.params.get('writeannotations', False):
2241             video_annotations = self._extract_annotations(video_id)
2242
2243         chapters = self._extract_chapters(description_original, video_duration)
2244
2245         # Look for the DASH manifest
2246         if self._downloader.params.get('youtube_include_dash_manifest', True):
2247             dash_mpd_fatal = True
2248             for mpd_url in dash_mpds:
2249                 dash_formats = {}
2250                 try:
2251                     def decrypt_sig(mobj):
2252                         s = mobj.group(1)
2253                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2254                         return '/signature/%s' % dec_s
2255
2256                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2257
2258                     for df in self._extract_mpd_formats(
2259                             mpd_url, video_id, fatal=dash_mpd_fatal,
2260                             formats_dict=self._formats):
2261                         if not df.get('filesize'):
2262                             df['filesize'] = _extract_filesize(df['url'])
2263                         # Do not overwrite DASH format found in some previous DASH manifest
2264                         if df['format_id'] not in dash_formats:
2265                             dash_formats[df['format_id']] = df
2266                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2267                         # allow them to fail without bug report message if we already have
2268                         # some DASH manifest succeeded. This is temporary workaround to reduce
2269                         # burst of bug reports until we figure out the reason and whether it
2270                         # can be fixed at all.
2271                         dash_mpd_fatal = False
2272                 except (ExtractorError, KeyError) as e:
2273                     self.report_warning(
2274                         'Skipping DASH manifest: %r' % e, video_id)
2275                 if dash_formats:
2276                     # Remove the formats we found through non-DASH, they
2277                     # contain less info and it can be wrong, because we use
2278                     # fixed values (for example the resolution). See
2279                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2280                     # example.
2281                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2282                     formats.extend(dash_formats.values())
2283
2284         # Check for malformed aspect ratio
2285         stretched_m = re.search(
2286             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2287             video_webpage)
2288         if stretched_m:
2289             w = float(stretched_m.group('w'))
2290             h = float(stretched_m.group('h'))
2291             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2292             # We will only process correct ratios.
2293             if w > 0 and h > 0:
2294                 ratio = w / h
2295                 for f in formats:
2296                     if f.get('vcodec') != 'none':
2297                         f['stretched_ratio'] = ratio
2298
2299         self._sort_formats(formats)
2300
2301         self.mark_watched(video_id, video_info, player_response)
2302
2303         return {
2304             'id': video_id,
2305             'uploader': video_uploader,
2306             'uploader_id': video_uploader_id,
2307             'uploader_url': video_uploader_url,
2308             'channel_id': channel_id,
2309             'channel_url': channel_url,
2310             'upload_date': upload_date,
2311             'license': video_license,
2312             'creator': video_creator or artist,
2313             'title': video_title,
2314             'alt_title': video_alt_title or track,
2315             'thumbnail': video_thumbnail,
2316             'description': video_description,
2317             'categories': video_categories,
2318             'tags': video_tags,
2319             'subtitles': video_subtitles,
2320             'automatic_captions': automatic_captions,
2321             'duration': video_duration,
2322             'age_limit': 18 if age_gate else 0,
2323             'annotations': video_annotations,
2324             'chapters': chapters,
2325             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2326             'view_count': view_count,
2327             'like_count': like_count,
2328             'dislike_count': dislike_count,
2329             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2330             'formats': formats,
2331             'is_live': is_live,
2332             'start_time': start_time,
2333             'end_time': end_time,
2334             'series': series,
2335             'season_number': season_number,
2336             'episode_number': episode_number,
2337             'track': track,
2338             'artist': artist,
2339             'album': album,
2340             'release_date': release_date,
2341             'release_year': release_year,
2342         }
2343
2344
2345 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2346     IE_DESC = 'YouTube.com playlists'
2347     _VALID_URL = r"""(?x)(?:
2348                         (?:https?://)?
2349                         (?:\w+\.)?
2350                         (?:
2351                             (?:
2352                                 youtube\.com|
2353                                 invidio\.us
2354                             )
2355                             /
2356                             (?:
2357                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2358                                \? (?:.*?[&;])*? (?:p|a|list)=
2359                             |  p/
2360                             )|
2361                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2362                         )
2363                         (
2364                             (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2365                             # Top tracks, they can also include dots
2366                             |(?:MC)[\w\.]*
2367                         )
2368                         .*
2369                      |
2370                         (%(playlist_id)s)
2371                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2372     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2373     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2374     IE_NAME = 'youtube:playlist'
2375     _TESTS = [{
2376         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2377         'info_dict': {
2378             'title': 'ytdl test PL',
2379             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2380         },
2381         'playlist_count': 3,
2382     }, {
2383         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2384         'info_dict': {
2385             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2386             'title': 'YDL_Empty_List',
2387         },
2388         'playlist_count': 0,
2389         'skip': 'This playlist is private',
2390     }, {
2391         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2392         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2393         'info_dict': {
2394             'title': '29C3: Not my department',
2395             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2396         },
2397         'playlist_count': 95,
2398     }, {
2399         'note': 'issue #673',
2400         'url': 'PLBB231211A4F62143',
2401         'info_dict': {
2402             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2403             'id': 'PLBB231211A4F62143',
2404         },
2405         'playlist_mincount': 26,
2406     }, {
2407         'note': 'Large playlist',
2408         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2409         'info_dict': {
2410             'title': 'Uploads from Cauchemar',
2411             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2412         },
2413         'playlist_mincount': 799,
2414     }, {
2415         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2416         'info_dict': {
2417             'title': 'YDL_safe_search',
2418             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2419         },
2420         'playlist_count': 2,
2421         'skip': 'This playlist is private',
2422     }, {
2423         'note': 'embedded',
2424         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2425         'playlist_count': 4,
2426         'info_dict': {
2427             'title': 'JODA15',
2428             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2429         }
2430     }, {
2431         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2432         'playlist_mincount': 485,
2433         'info_dict': {
2434             'title': '2017 華語最新單曲 (2/24更新)',
2435             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2436         }
2437     }, {
2438         'note': 'Embedded SWF player',
2439         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2440         'playlist_count': 4,
2441         'info_dict': {
2442             'title': 'JODA7',
2443             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2444         }
2445     }, {
2446         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2447         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2448         'info_dict': {
2449             'title': 'Uploads from Interstellar Movie',
2450             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2451         },
2452         'playlist_mincount': 21,
2453     }, {
2454         # Playlist URL that does not actually serve a playlist
2455         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2456         'info_dict': {
2457             'id': 'FqZTN594JQw',
2458             'ext': 'webm',
2459             'title': "Smiley's People 01 detective, Adventure Series, Action",
2460             'uploader': 'STREEM',
2461             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2462             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2463             'upload_date': '20150526',
2464             'license': 'Standard YouTube License',
2465             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2466             'categories': ['People & Blogs'],
2467             'tags': list,
2468             'view_count': int,
2469             'like_count': int,
2470             'dislike_count': int,
2471         },
2472         'params': {
2473             'skip_download': True,
2474         },
2475         'add_ie': [YoutubeIE.ie_key()],
2476     }, {
2477         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2478         'info_dict': {
2479             'id': 'yeWKywCrFtk',
2480             'ext': 'mp4',
2481             'title': 'Small Scale Baler and Braiding Rugs',
2482             'uploader': 'Backus-Page House Museum',
2483             'uploader_id': 'backuspagemuseum',
2484             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2485             'upload_date': '20161008',
2486             'license': 'Standard YouTube License',
2487             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2488             'categories': ['Nonprofits & Activism'],
2489             'tags': list,
2490             'like_count': int,
2491             'dislike_count': int,
2492         },
2493         'params': {
2494             'noplaylist': True,
2495             'skip_download': True,
2496         },
2497     }, {
2498         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2499         'only_matching': True,
2500     }, {
2501         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2502         'only_matching': True,
2503     }, {
2504         # music album playlist
2505         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2506         'only_matching': True,
2507     }, {
2508         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2509         'only_matching': True,
2510     }]
2511
2512     def _real_initialize(self):
2513         self._login()
2514
2515     def _extract_mix(self, playlist_id):
2516         # The mixes are generated from a single video
2517         # the id of the playlist is just 'RD' + video_id
2518         ids = []
2519         last_id = playlist_id[-11:]
2520         for n in itertools.count(1):
2521             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2522             webpage = self._download_webpage(
2523                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2524             new_ids = orderedSet(re.findall(
2525                 r'''(?xs)data-video-username=".*?".*?
2526                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2527                 webpage))
2528             # Fetch new pages until all the videos are repeated, it seems that
2529             # there are always 51 unique videos.
2530             new_ids = [_id for _id in new_ids if _id not in ids]
2531             if not new_ids:
2532                 break
2533             ids.extend(new_ids)
2534             last_id = ids[-1]
2535
2536         url_results = self._ids_to_results(ids)
2537
2538         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2539         title_span = (
2540             search_title('playlist-title') or
2541             search_title('title long-title') or
2542             search_title('title'))
2543         title = clean_html(title_span)
2544
2545         return self.playlist_result(url_results, playlist_id, title)
2546
2547     def _extract_playlist(self, playlist_id):
2548         url = self._TEMPLATE_URL % playlist_id
2549         page = self._download_webpage(url, playlist_id)
2550
2551         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2552         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2553             match = match.strip()
2554             # Check if the playlist exists or is private
2555             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2556             if mobj:
2557                 reason = mobj.group('reason')
2558                 message = 'This playlist %s' % reason
2559                 if 'private' in reason:
2560                     message += ', use --username or --netrc to access it'
2561                 message += '.'
2562                 raise ExtractorError(message, expected=True)
2563             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2564                 raise ExtractorError(
2565                     'Invalid parameters. Maybe URL is incorrect.',
2566                     expected=True)
2567             elif re.match(r'[^<]*Choose your language[^<]*', match):
2568                 continue
2569             else:
2570                 self.report_warning('Youtube gives an alert message: ' + match)
2571
2572         playlist_title = self._html_search_regex(
2573             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2574             page, 'title', default=None)
2575
2576         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2577         uploader = self._search_regex(
2578             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2579             page, 'uploader', default=None)
2580         mobj = re.search(
2581             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2582             page)
2583         if mobj:
2584             uploader_id = mobj.group('uploader_id')
2585             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2586         else:
2587             uploader_id = uploader_url = None
2588
2589         has_videos = True
2590
2591         if not playlist_title:
2592             try:
2593                 # Some playlist URLs don't actually serve a playlist (e.g.
2594                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2595                 next(self._entries(page, playlist_id))
2596             except StopIteration:
2597                 has_videos = False
2598
2599         playlist = self.playlist_result(
2600             self._entries(page, playlist_id), playlist_id, playlist_title)
2601         playlist.update({
2602             'uploader': uploader,
2603             'uploader_id': uploader_id,
2604             'uploader_url': uploader_url,
2605         })
2606
2607         return has_videos, playlist
2608
2609     def _check_download_just_video(self, url, playlist_id):
2610         # Check if it's a video-specific URL
2611         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2612         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2613             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2614             'video id', default=None)
2615         if video_id:
2616             if self._downloader.params.get('noplaylist'):
2617                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2618                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2619             else:
2620                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2621                 return video_id, None
2622         return None, None
2623
2624     def _real_extract(self, url):
2625         # Extract playlist id
2626         mobj = re.match(self._VALID_URL, url)
2627         if mobj is None:
2628             raise ExtractorError('Invalid URL: %s' % url)
2629         playlist_id = mobj.group(1) or mobj.group(2)
2630
2631         video_id, video = self._check_download_just_video(url, playlist_id)
2632         if video:
2633             return video
2634
2635         if playlist_id.startswith(('RD', 'UL', 'PU')):
2636             # Mixes require a custom extraction process
2637             return self._extract_mix(playlist_id)
2638
2639         has_videos, playlist = self._extract_playlist(playlist_id)
2640         if has_videos or not video_id:
2641             return playlist
2642
2643         # Some playlist URLs don't actually serve a playlist (see
2644         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2645         # Fallback to plain video extraction if there is a video id
2646         # along with playlist id.
2647         return self.url_result(video_id, 'Youtube', video_id=video_id)
2648
2649
2650 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2651     IE_DESC = 'YouTube.com channels'
2652     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2653     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2654     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2655     IE_NAME = 'youtube:channel'
2656     _TESTS = [{
2657         'note': 'paginated channel',
2658         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2659         'playlist_mincount': 91,
2660         'info_dict': {
2661             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2662             'title': 'Uploads from lex will',
2663         }
2664     }, {
2665         'note': 'Age restricted channel',
2666         # from https://www.youtube.com/user/DeusExOfficial
2667         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2668         'playlist_mincount': 64,
2669         'info_dict': {
2670             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2671             'title': 'Uploads from Deus Ex',
2672         },
2673     }, {
2674         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2675         'only_matching': True,
2676     }]
2677
2678     @classmethod
2679     def suitable(cls, url):
2680         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2681                 else super(YoutubeChannelIE, cls).suitable(url))
2682
2683     def _build_template_url(self, url, channel_id):
2684         return self._TEMPLATE_URL % channel_id
2685
2686     def _real_extract(self, url):
2687         channel_id = self._match_id(url)
2688
2689         url = self._build_template_url(url, channel_id)
2690
2691         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2692         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2693         # otherwise fallback on channel by page extraction
2694         channel_page = self._download_webpage(
2695             url + '?view=57', channel_id,
2696             'Downloading channel page', fatal=False)
2697         if channel_page is False:
2698             channel_playlist_id = False
2699         else:
2700             channel_playlist_id = self._html_search_meta(
2701                 'channelId', channel_page, 'channel id', default=None)
2702             if not channel_playlist_id:
2703                 channel_url = self._html_search_meta(
2704                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2705                     channel_page, 'channel url', default=None)
2706                 if channel_url:
2707                     channel_playlist_id = self._search_regex(
2708                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2709                         channel_url, 'channel id', default=None)
2710         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2711             playlist_id = 'UU' + channel_playlist_id[2:]
2712             return self.url_result(
2713                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2714
2715         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2716         autogenerated = re.search(r'''(?x)
2717                 class="[^"]*?(?:
2718                     channel-header-autogenerated-label|
2719                     yt-channel-title-autogenerated
2720                 )[^"]*"''', channel_page) is not None
2721
2722         if autogenerated:
2723             # The videos are contained in a single page
2724             # the ajax pages can't be used, they are empty
2725             entries = [
2726                 self.url_result(
2727                     video_id, 'Youtube', video_id=video_id,
2728                     video_title=video_title)
2729                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2730             return self.playlist_result(entries, channel_id)
2731
2732         try:
2733             next(self._entries(channel_page, channel_id))
2734         except StopIteration:
2735             alert_message = self._html_search_regex(
2736                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2737                 channel_page, 'alert', default=None, group='alert')
2738             if alert_message:
2739                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2740
2741         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2742
2743
2744 class YoutubeUserIE(YoutubeChannelIE):
2745     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2746     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2747     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2748     IE_NAME = 'youtube:user'
2749
2750     _TESTS = [{
2751         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2752         'playlist_mincount': 320,
2753         'info_dict': {
2754             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2755             'title': 'Uploads from The Linux Foundation',
2756         }
2757     }, {
2758         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2759         # but not https://www.youtube.com/user/12minuteathlete/videos
2760         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2761         'playlist_mincount': 249,
2762         'info_dict': {
2763             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2764             'title': 'Uploads from 12 Minute Athlete',
2765         }
2766     }, {
2767         'url': 'ytuser:phihag',
2768         'only_matching': True,
2769     }, {
2770         'url': 'https://www.youtube.com/c/gametrailers',
2771         'only_matching': True,
2772     }, {
2773         'url': 'https://www.youtube.com/gametrailers',
2774         'only_matching': True,
2775     }, {
2776         # This channel is not available, geo restricted to JP
2777         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2778         'only_matching': True,
2779     }]
2780
2781     @classmethod
2782     def suitable(cls, url):
2783         # Don't return True if the url can be extracted with other youtube
2784         # extractor, the regex would is too permissive and it would match.
2785         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2786         if any(ie.suitable(url) for ie in other_yt_ies):
2787             return False
2788         else:
2789             return super(YoutubeUserIE, cls).suitable(url)
2790
2791     def _build_template_url(self, url, channel_id):
2792         mobj = re.match(self._VALID_URL, url)
2793         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2794
2795
2796 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2797     IE_DESC = 'YouTube.com live streams'
2798     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2799     IE_NAME = 'youtube:live'
2800
2801     _TESTS = [{
2802         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2803         'info_dict': {
2804             'id': 'a48o2S1cPoo',
2805             'ext': 'mp4',
2806             'title': 'The Young Turks - Live Main Show',
2807             'uploader': 'The Young Turks',
2808             'uploader_id': 'TheYoungTurks',
2809             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2810             'upload_date': '20150715',
2811             'license': 'Standard YouTube License',
2812             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2813             'categories': ['News & Politics'],
2814             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2815             'like_count': int,
2816             'dislike_count': int,
2817         },
2818         'params': {
2819             'skip_download': True,
2820         },
2821     }, {
2822         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2823         'only_matching': True,
2824     }, {
2825         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2826         'only_matching': True,
2827     }, {
2828         'url': 'https://www.youtube.com/TheYoungTurks/live',
2829         'only_matching': True,
2830     }]
2831
2832     def _real_extract(self, url):
2833         mobj = re.match(self._VALID_URL, url)
2834         channel_id = mobj.group('id')
2835         base_url = mobj.group('base_url')
2836         webpage = self._download_webpage(url, channel_id, fatal=False)
2837         if webpage:
2838             page_type = self._og_search_property(
2839                 'type', webpage, 'page type', default='')
2840             video_id = self._html_search_meta(
2841                 'videoId', webpage, 'video id', default=None)
2842             if page_type.startswith('video') and video_id and re.match(
2843                     r'^[0-9A-Za-z_-]{11}$', video_id):
2844                 return self.url_result(video_id, YoutubeIE.ie_key())
2845         return self.url_result(base_url)
2846
2847
2848 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2849     IE_DESC = 'YouTube.com user/channel playlists'
2850     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2851     IE_NAME = 'youtube:playlists'
2852
2853     _TESTS = [{
2854         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2855         'playlist_mincount': 4,
2856         'info_dict': {
2857             'id': 'ThirstForScience',
2858             'title': 'Thirst for Science',
2859         },
2860     }, {
2861         # with "Load more" button
2862         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2863         'playlist_mincount': 70,
2864         'info_dict': {
2865             'id': 'igorkle1',
2866             'title': 'Игорь Клейнер',
2867         },
2868     }, {
2869         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2870         'playlist_mincount': 17,
2871         'info_dict': {
2872             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2873             'title': 'Chem Player',
2874         },
2875     }]
2876
2877
2878 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2879     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2880
2881
2882 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2883     IE_DESC = 'YouTube.com searches'
2884     # there doesn't appear to be a real limit, for example if you search for
2885     # 'python' you get more than 8.000.000 results
2886     _MAX_RESULTS = float('inf')
2887     IE_NAME = 'youtube:search'
2888     _SEARCH_KEY = 'ytsearch'
2889     _EXTRA_QUERY_ARGS = {}
2890     _TESTS = []
2891
2892     def _get_n_results(self, query, n):
2893         """Get a specified number of results for a query"""
2894
2895         videos = []
2896         limit = n
2897
2898         url_query = {
2899             'search_query': query.encode('utf-8'),
2900         }
2901         url_query.update(self._EXTRA_QUERY_ARGS)
2902         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2903
2904         for pagenum in itertools.count(1):
2905             data = self._download_json(
2906                 result_url, video_id='query "%s"' % query,
2907                 note='Downloading page %s' % pagenum,
2908                 errnote='Unable to download API page',
2909                 query={'spf': 'navigate'})
2910             html_content = data[1]['body']['content']
2911
2912             if 'class="search-message' in html_content:
2913                 raise ExtractorError(
2914                     '[youtube] No video results', expected=True)
2915
2916             new_videos = list(self._process_page(html_content))
2917             videos += new_videos
2918             if not new_videos or len(videos) > limit:
2919                 break
2920             next_link = self._html_search_regex(
2921                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2922                 html_content, 'next link', default=None)
2923             if next_link is None:
2924                 break
2925             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2926
2927         if len(videos) > n:
2928             videos = videos[:n]
2929         return self.playlist_result(videos, query)
2930
2931
2932 class YoutubeSearchDateIE(YoutubeSearchIE):
2933     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2934     _SEARCH_KEY = 'ytsearchdate'
2935     IE_DESC = 'YouTube.com searches, newest videos first'
2936     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2937
2938
2939 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2940     IE_DESC = 'YouTube.com search URLs'
2941     IE_NAME = 'youtube:search_url'
2942     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2943     _TESTS = [{
2944         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2945         'playlist_mincount': 5,
2946         'info_dict': {
2947             'title': 'youtube-dl test video',
2948         }
2949     }, {
2950         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2951         'only_matching': True,
2952     }]
2953
2954     def _real_extract(self, url):
2955         mobj = re.match(self._VALID_URL, url)
2956         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2957         webpage = self._download_webpage(url, query)
2958         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2959
2960
2961 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2962     IE_DESC = 'YouTube.com (multi-season) shows'
2963     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2964     IE_NAME = 'youtube:show'
2965     _TESTS = [{
2966         'url': 'https://www.youtube.com/show/airdisasters',
2967         'playlist_mincount': 5,
2968         'info_dict': {
2969             'id': 'airdisasters',
2970             'title': 'Air Disasters',
2971         }
2972     }]
2973
2974     def _real_extract(self, url):
2975         playlist_id = self._match_id(url)
2976         return super(YoutubeShowIE, self)._real_extract(
2977             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2978
2979
2980 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2981     """
2982     Base class for feed extractors
2983     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2984     """
2985     _LOGIN_REQUIRED = True
2986
2987     @property
2988     def IE_NAME(self):
2989         return 'youtube:%s' % self._FEED_NAME
2990
2991     def _real_initialize(self):
2992         self._login()
2993
2994     def _entries(self, page):
2995         # The extraction process is the same as for playlists, but the regex
2996         # for the video ids doesn't contain an index
2997         ids = []
2998         more_widget_html = content_html = page
2999         for page_num in itertools.count(1):
3000             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3001
3002             # 'recommended' feed has infinite 'load more' and each new portion spins
3003             # the same videos in (sometimes) slightly different order, so we'll check
3004             # for unicity and break when portion has no new videos
3005             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3006             if not new_ids:
3007                 break
3008
3009             ids.extend(new_ids)
3010
3011             for entry in self._ids_to_results(new_ids):
3012                 yield entry
3013
3014             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3015             if not mobj:
3016                 break
3017
3018             more = self._download_json(
3019                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3020                 'Downloading page #%s' % page_num,
3021                 transform_source=uppercase_escape)
3022             content_html = more['content_html']
3023             more_widget_html = more['load_more_widget_html']
3024
3025     def _real_extract(self, url):
3026         page = self._download_webpage(
3027             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3028             self._PLAYLIST_TITLE)
3029         return self.playlist_result(
3030             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3031
3032
3033 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3034     IE_NAME = 'youtube:watchlater'
3035     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3036     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3037
3038     _TESTS = [{
3039         'url': 'https://www.youtube.com/playlist?list=WL',
3040         'only_matching': True,
3041     }, {
3042         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3043         'only_matching': True,
3044     }]
3045
3046     def _real_extract(self, url):
3047         _, video = self._check_download_just_video(url, 'WL')
3048         if video:
3049             return video
3050         _, playlist = self._extract_playlist('WL')
3051         return playlist
3052
3053
3054 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3055     IE_NAME = 'youtube:favorites'
3056     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3057     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3058     _LOGIN_REQUIRED = True
3059
3060     def _real_extract(self, url):
3061         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3062         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3063         return self.url_result(playlist_id, 'YoutubePlaylist')
3064
3065
3066 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3067     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3068     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3069     _FEED_NAME = 'recommended'
3070     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3071
3072
3073 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3074     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3075     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3076     _FEED_NAME = 'subscriptions'
3077     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3078
3079
3080 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3081     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3082     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3083     _FEED_NAME = 'history'
3084     _PLAYLIST_TITLE = 'Youtube History'
3085
3086
3087 class YoutubeTruncatedURLIE(InfoExtractor):
3088     IE_NAME = 'youtube:truncated_url'
3089     IE_DESC = False  # Do not list
3090     _VALID_URL = r'''(?x)
3091         (?:https?://)?
3092         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3093         (?:watch\?(?:
3094             feature=[a-z_]+|
3095             annotation_id=annotation_[^&]+|
3096             x-yt-cl=[0-9]+|
3097             hl=[^&]*|
3098             t=[0-9]+
3099         )?
3100         |
3101             attribution_link\?a=[^&]+
3102         )
3103         $
3104     '''
3105
3106     _TESTS = [{
3107         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3108         'only_matching': True,
3109     }, {
3110         'url': 'https://www.youtube.com/watch?',
3111         'only_matching': True,
3112     }, {
3113         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3114         'only_matching': True,
3115     }, {
3116         'url': 'https://www.youtube.com/watch?feature=foo',
3117         'only_matching': True,
3118     }, {
3119         'url': 'https://www.youtube.com/watch?hl=en-GB',
3120         'only_matching': True,
3121     }, {
3122         'url': 'https://www.youtube.com/watch?t=2372',
3123         'only_matching': True,
3124     }]
3125
3126     def _real_extract(self, url):
3127         raise ExtractorError(
3128             'Did you forget to quote the URL? Remember that & is a meta '
3129             'character in most shells, so you want to put the URL in quotes, '
3130             'like  youtube-dl '
3131             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3132             ' or simply  youtube-dl BaW_jenozKc  .',
3133             expected=True)
3134
3135
3136 class YoutubeTruncatedIDIE(InfoExtractor):
3137     IE_NAME = 'youtube:truncated_id'
3138     IE_DESC = False  # Do not list
3139     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3140
3141     _TESTS = [{
3142         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3143         'only_matching': True,
3144     }]
3145
3146     def _real_extract(self, url):
3147         video_id = self._match_id(url)
3148         raise ExtractorError(
3149             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3150             expected=True)