[youtube] Improve extraction robustness
[ytdl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5
6 import itertools
7 import json
8 import os.path
9 import random
10 import re
11 import time
12 import traceback
13
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
18     compat_chr,
19     compat_kwargs,
20     compat_parse_qs,
21     compat_urllib_parse_unquote,
22     compat_urllib_parse_unquote_plus,
23     compat_urllib_parse_urlencode,
24     compat_urllib_parse_urlparse,
25     compat_urlparse,
26     compat_str,
27 )
28 from ..utils import (
29     clean_html,
30     dict_get,
31     error_to_compat_str,
32     ExtractorError,
33     float_or_none,
34     get_element_by_attribute,
35     get_element_by_id,
36     int_or_none,
37     mimetype2ext,
38     orderedSet,
39     parse_codecs,
40     parse_duration,
41     qualities,
42     remove_quotes,
43     remove_start,
44     smuggle_url,
45     str_or_none,
46     str_to_int,
47     try_get,
48     unescapeHTML,
49     unified_strdate,
50     unsmuggle_url,
51     uppercase_escape,
52     url_or_none,
53     urlencode_postdata,
54 )
55
56
57 class YoutubeBaseInfoExtractor(InfoExtractor):
58     """Provide base functions for Youtube extractors"""
59     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
60     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
61
62     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
63     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
64     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
65
66     _NETRC_MACHINE = 'youtube'
67     # If True it will raise an error if no login info is provided
68     _LOGIN_REQUIRED = False
69
70     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
71
72     def _set_language(self):
73         self._set_cookie(
74             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
75             # YouTube sets the expire time to about two months
76             expire_time=time.time() + 2 * 30 * 24 * 3600)
77
78     def _ids_to_results(self, ids):
79         return [
80             self.url_result(vid_id, 'Youtube', video_id=vid_id)
81             for vid_id in ids]
82
83     def _login(self):
84         """
85         Attempt to log in to YouTube.
86         True is returned if successful or skipped.
87         False is returned if login failed.
88
89         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
90         """
91         username, password = self._get_login_info()
92         # No authentication to be performed
93         if username is None:
94             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
95                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
96             return True
97
98         login_page = self._download_webpage(
99             self._LOGIN_URL, None,
100             note='Downloading login page',
101             errnote='unable to fetch login page', fatal=False)
102         if login_page is False:
103             return
104
105         login_form = self._hidden_inputs(login_page)
106
107         def req(url, f_req, note, errnote):
108             data = login_form.copy()
109             data.update({
110                 'pstMsg': 1,
111                 'checkConnection': 'youtube',
112                 'checkedDomains': 'youtube',
113                 'hl': 'en',
114                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
115                 'f.req': json.dumps(f_req),
116                 'flowName': 'GlifWebSignIn',
117                 'flowEntry': 'ServiceLogin',
118             })
119             return self._download_json(
120                 url, None, note=note, errnote=errnote,
121                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
122                 fatal=False,
123                 data=urlencode_postdata(data), headers={
124                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
125                     'Google-Accounts-XSRF': 1,
126                 })
127
128         def warn(message):
129             self._downloader.report_warning(message)
130
131         lookup_req = [
132             username,
133             None, [], None, 'US', None, None, 2, False, True,
134             [
135                 None, None,
136                 [2, 1, None, 1,
137                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
138                  None, [], 4],
139                 1, [None, None, []], None, None, None, True
140             ],
141             username,
142         ]
143
144         lookup_results = req(
145             self._LOOKUP_URL, lookup_req,
146             'Looking up account info', 'Unable to look up account info')
147
148         if lookup_results is False:
149             return False
150
151         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
152         if not user_hash:
153             warn('Unable to extract user hash')
154             return False
155
156         challenge_req = [
157             user_hash,
158             None, 1, None, [1, None, None, None, [password, None, True]],
159             [
160                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
161                 1, [None, None, []], None, None, None, True
162             ]]
163
164         challenge_results = req(
165             self._CHALLENGE_URL, challenge_req,
166             'Logging in', 'Unable to log in')
167
168         if challenge_results is False:
169             return
170
171         login_res = try_get(challenge_results, lambda x: x[0][5], list)
172         if login_res:
173             login_msg = try_get(login_res, lambda x: x[5], compat_str)
174             warn(
175                 'Unable to login: %s' % 'Invalid password'
176                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
177             return False
178
179         res = try_get(challenge_results, lambda x: x[0][-1], list)
180         if not res:
181             warn('Unable to extract result entry')
182             return False
183
184         login_challenge = try_get(res, lambda x: x[0][0], list)
185         if login_challenge:
186             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
187             if challenge_str == 'TWO_STEP_VERIFICATION':
188                 # SEND_SUCCESS - TFA code has been successfully sent to phone
189                 # QUOTA_EXCEEDED - reached the limit of TFA codes
190                 status = try_get(login_challenge, lambda x: x[5], compat_str)
191                 if status == 'QUOTA_EXCEEDED':
192                     warn('Exceeded the limit of TFA codes, try later')
193                     return False
194
195                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
196                 if not tl:
197                     warn('Unable to extract TL')
198                     return False
199
200                 tfa_code = self._get_tfa_info('2-step verification code')
201
202                 if not tfa_code:
203                     warn(
204                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
205                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
206                     return False
207
208                 tfa_code = remove_start(tfa_code, 'G-')
209
210                 tfa_req = [
211                     user_hash, None, 2, None,
212                     [
213                         9, None, None, None, None, None, None, None,
214                         [None, tfa_code, True, 2]
215                     ]]
216
217                 tfa_results = req(
218                     self._TFA_URL.format(tl), tfa_req,
219                     'Submitting TFA code', 'Unable to submit TFA code')
220
221                 if tfa_results is False:
222                     return False
223
224                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
225                 if tfa_res:
226                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
227                     warn(
228                         'Unable to finish TFA: %s' % 'Invalid TFA code'
229                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
230                     return False
231
232                 check_cookie_url = try_get(
233                     tfa_results, lambda x: x[0][-1][2], compat_str)
234             else:
235                 CHALLENGES = {
236                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
237                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
238                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
239                 }
240                 challenge = CHALLENGES.get(
241                     challenge_str,
242                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
243                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
244                 return False
245         else:
246             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
247
248         if not check_cookie_url:
249             warn('Unable to extract CheckCookie URL')
250             return False
251
252         check_cookie_results = self._download_webpage(
253             check_cookie_url, None, 'Checking cookie', fatal=False)
254
255         if check_cookie_results is False:
256             return False
257
258         if 'https://myaccount.google.com/' not in check_cookie_results:
259             warn('Unable to log in')
260             return False
261
262         return True
263
264     def _download_webpage_handle(self, *args, **kwargs):
265         query = kwargs.get('query', {}).copy()
266         query['disable_polymer'] = 'true'
267         kwargs['query'] = query
268         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
269             *args, **compat_kwargs(kwargs))
270
271     def _real_initialize(self):
272         if self._downloader is None:
273             return
274         self._set_language()
275         if not self._login():
276             return
277
278
279 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
280     # Extract entries from page with "Load more" button
281     def _entries(self, page, playlist_id):
282         more_widget_html = content_html = page
283         for page_num in itertools.count(1):
284             for entry in self._process_page(content_html):
285                 yield entry
286
287             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
288             if not mobj:
289                 break
290
291             more = self._download_json(
292                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
293                 'Downloading page #%s' % page_num,
294                 transform_source=uppercase_escape)
295             content_html = more['content_html']
296             if not content_html.strip():
297                 # Some webpages show a "Load more" button but they don't
298                 # have more videos
299                 break
300             more_widget_html = more['load_more_widget_html']
301
302
303 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
304     def _process_page(self, content):
305         for video_id, video_title in self.extract_videos_from_page(content):
306             yield self.url_result(video_id, 'Youtube', video_id, video_title)
307
308     def extract_videos_from_page(self, page):
309         ids_in_page = []
310         titles_in_page = []
311         for mobj in re.finditer(self._VIDEO_RE, page):
312             # The link with index 0 is not the first video of the playlist (not sure if still actual)
313             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
314                 continue
315             video_id = mobj.group('id')
316             video_title = unescapeHTML(mobj.group('title'))
317             if video_title:
318                 video_title = video_title.strip()
319             try:
320                 idx = ids_in_page.index(video_id)
321                 if video_title and not titles_in_page[idx]:
322                     titles_in_page[idx] = video_title
323             except ValueError:
324                 ids_in_page.append(video_id)
325                 titles_in_page.append(video_title)
326         return zip(ids_in_page, titles_in_page)
327
328
329 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
330     def _process_page(self, content):
331         for playlist_id in orderedSet(re.findall(
332                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
333                 content)):
334             yield self.url_result(
335                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
336
337     def _real_extract(self, url):
338         playlist_id = self._match_id(url)
339         webpage = self._download_webpage(url, playlist_id)
340         title = self._og_search_title(webpage, fatal=False)
341         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
342
343
344 class YoutubeIE(YoutubeBaseInfoExtractor):
345     IE_DESC = 'YouTube.com'
346     _VALID_URL = r"""(?x)^
347                      (
348                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
349                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
350                             (?:www\.)?deturl\.com/www\.youtube\.com/|
351                             (?:www\.)?pwnyoutube\.com/|
352                             (?:www\.)?hooktube\.com/|
353                             (?:www\.)?yourepeat\.com/|
354                             tube\.majestyc\.net/|
355                             (?:(?:www|dev)\.)?invidio\.us/|
356                             (?:www\.)?invidiou\.sh/|
357                             (?:www\.)?invidious\.snopyta\.org/|
358                             (?:www\.)?invidious\.kabi\.tk/|
359                             (?:www\.)?vid\.wxzm\.sx/|
360                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
361                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
362                          (?:                                                  # the various things that can precede the ID:
363                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
364                              |(?:                                             # or the v= param in all its forms
365                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
366                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
367                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
368                                  v=
369                              )
370                          ))
371                          |(?:
372                             youtu\.be|                                        # just youtu.be/xxxx
373                             vid\.plus|                                        # or vid.plus/xxxx
374                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
375                          )/
376                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
377                          )
378                      )?                                                       # all until now is optional -> you can pass the naked ID
379                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
380                      (?!.*?\blist=
381                         (?:
382                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
383                             WL                                                # WL are handled by the watch later IE
384                         )
385                      )
386                      (?(1).+)?                                                # if we found the ID, everything can follow
387                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
388     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
389     _formats = {
390         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
391         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
392         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
393         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
394         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
395         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
396         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
397         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
398         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
399         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
400         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
401         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
402         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
403         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
404         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
405         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
406         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
407         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
408
409
410         # 3D videos
411         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
412         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
413         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
414         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
415         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
416         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
417         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
418
419         # Apple HTTP Live Streaming
420         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
421         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
422         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
423         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
424         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
425         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
426         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
427         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
428
429         # DASH mp4 video
430         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
431         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
432         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
433         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
434         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
435         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
436         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
437         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
438         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
439         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
440         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
441         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
442
443         # Dash mp4 audio
444         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
445         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
446         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
447         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
448         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
449         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
450         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
451
452         # Dash webm
453         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
454         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
455         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
456         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
457         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
458         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
459         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
460         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
461         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
462         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
463         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
464         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
465         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
466         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
467         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
468         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
469         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
470         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
471         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
472         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
473         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
474         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
475
476         # Dash webm audio
477         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
478         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
479
480         # Dash webm audio with opus inside
481         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
482         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
483         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
484
485         # RTMP (unnamed)
486         '_rtmp': {'protocol': 'rtmp'},
487     }
488     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
489
490     _GEO_BYPASS = False
491
492     IE_NAME = 'youtube'
493     _TESTS = [
494         {
495             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
496             'info_dict': {
497                 'id': 'BaW_jenozKc',
498                 'ext': 'mp4',
499                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
500                 'uploader': 'Philipp Hagemeister',
501                 'uploader_id': 'phihag',
502                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
503                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
504                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
505                 'upload_date': '20121002',
506                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
507                 'categories': ['Science & Technology'],
508                 'tags': ['youtube-dl'],
509                 'duration': 10,
510                 'view_count': int,
511                 'like_count': int,
512                 'dislike_count': int,
513                 'start_time': 1,
514                 'end_time': 9,
515             }
516         },
517         {
518             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
519             'note': 'Test generic use_cipher_signature video (#897)',
520             'info_dict': {
521                 'id': 'UxxajLWwzqY',
522                 'ext': 'mp4',
523                 'upload_date': '20120506',
524                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
525                 'alt_title': 'I Love It (feat. Charli XCX)',
526                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
527                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
528                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
529                          'iconic ep', 'iconic', 'love', 'it'],
530                 'duration': 180,
531                 'uploader': 'Icona Pop',
532                 'uploader_id': 'IconaPop',
533                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
534                 'creator': 'Icona Pop',
535                 'track': 'I Love It (feat. Charli XCX)',
536                 'artist': 'Icona Pop',
537             }
538         },
539         {
540             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
541             'note': 'Test VEVO video with age protection (#956)',
542             'info_dict': {
543                 'id': '07FYdnEawAQ',
544                 'ext': 'mp4',
545                 'upload_date': '20130703',
546                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
547                 'alt_title': 'Tunnel Vision',
548                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
549                 'duration': 419,
550                 'uploader': 'justintimberlakeVEVO',
551                 'uploader_id': 'justintimberlakeVEVO',
552                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
553                 'creator': 'Justin Timberlake',
554                 'track': 'Tunnel Vision',
555                 'artist': 'Justin Timberlake',
556                 'age_limit': 18,
557             }
558         },
559         {
560             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
561             'note': 'Embed-only video (#1746)',
562             'info_dict': {
563                 'id': 'yZIXLfi8CZQ',
564                 'ext': 'mp4',
565                 'upload_date': '20120608',
566                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
567                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
568                 'uploader': 'SET India',
569                 'uploader_id': 'setindia',
570                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
571                 'age_limit': 18,
572             }
573         },
574         {
575             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
576             'note': 'Use the first video ID in the URL',
577             'info_dict': {
578                 'id': 'BaW_jenozKc',
579                 'ext': 'mp4',
580                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
581                 'uploader': 'Philipp Hagemeister',
582                 'uploader_id': 'phihag',
583                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
584                 'upload_date': '20121002',
585                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
586                 'categories': ['Science & Technology'],
587                 'tags': ['youtube-dl'],
588                 'duration': 10,
589                 'view_count': int,
590                 'like_count': int,
591                 'dislike_count': int,
592             },
593             'params': {
594                 'skip_download': True,
595             },
596         },
597         {
598             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
599             'note': '256k DASH audio (format 141) via DASH manifest',
600             'info_dict': {
601                 'id': 'a9LDPn-MO4I',
602                 'ext': 'm4a',
603                 'upload_date': '20121002',
604                 'uploader_id': '8KVIDEO',
605                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
606                 'description': '',
607                 'uploader': '8KVIDEO',
608                 'title': 'UHDTV TEST 8K VIDEO.mp4'
609             },
610             'params': {
611                 'youtube_include_dash_manifest': True,
612                 'format': '141',
613             },
614             'skip': 'format 141 not served anymore',
615         },
616         # DASH manifest with encrypted signature
617         {
618             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
619             'info_dict': {
620                 'id': 'IB3lcPjvWLA',
621                 'ext': 'm4a',
622                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
623                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
624                 'duration': 244,
625                 'uploader': 'AfrojackVEVO',
626                 'uploader_id': 'AfrojackVEVO',
627                 'upload_date': '20131011',
628             },
629             'params': {
630                 'youtube_include_dash_manifest': True,
631                 'format': '141/bestaudio[ext=m4a]',
632             },
633         },
634         # JS player signature function name containing $
635         {
636             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
637             'info_dict': {
638                 'id': 'nfWlot6h_JM',
639                 'ext': 'm4a',
640                 'title': 'Taylor Swift - Shake It Off',
641                 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
642                 'duration': 242,
643                 'uploader': 'TaylorSwiftVEVO',
644                 'uploader_id': 'TaylorSwiftVEVO',
645                 'upload_date': '20140818',
646                 'creator': 'Taylor Swift',
647             },
648             'params': {
649                 'youtube_include_dash_manifest': True,
650                 'format': '141/bestaudio[ext=m4a]',
651             },
652         },
653         # Controversy video
654         {
655             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
656             'info_dict': {
657                 'id': 'T4XJQO3qol8',
658                 'ext': 'mp4',
659                 'duration': 219,
660                 'upload_date': '20100909',
661                 'uploader': 'Amazing Atheist',
662                 'uploader_id': 'TheAmazingAtheist',
663                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
664                 'title': 'Burning Everyone\'s Koran',
665                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
666             }
667         },
668         # Normal age-gate video (No vevo, embed allowed)
669         {
670             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
671             'info_dict': {
672                 'id': 'HtVdAasjOgU',
673                 'ext': 'mp4',
674                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
675                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
676                 'duration': 142,
677                 'uploader': 'The Witcher',
678                 'uploader_id': 'WitcherGame',
679                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
680                 'upload_date': '20140605',
681                 'age_limit': 18,
682             },
683         },
684         # Age-gate video with encrypted signature
685         {
686             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
687             'info_dict': {
688                 'id': '6kLq3WMV1nU',
689                 'ext': 'mp4',
690                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
691                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
692                 'duration': 246,
693                 'uploader': 'LloydVEVO',
694                 'uploader_id': 'LloydVEVO',
695                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
696                 'upload_date': '20110629',
697                 'age_limit': 18,
698             },
699         },
700         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
701         # YouTube Red ad is not captured for creator
702         {
703             'url': '__2ABJjxzNo',
704             'info_dict': {
705                 'id': '__2ABJjxzNo',
706                 'ext': 'mp4',
707                 'duration': 266,
708                 'upload_date': '20100430',
709                 'uploader_id': 'deadmau5',
710                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
711                 'creator': 'deadmau5',
712                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
713                 'uploader': 'deadmau5',
714                 'title': 'Deadmau5 - Some Chords (HD)',
715                 'alt_title': 'Some Chords',
716             },
717             'expected_warnings': [
718                 'DASH manifest missing',
719             ]
720         },
721         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
722         {
723             'url': 'lqQg6PlCWgI',
724             'info_dict': {
725                 'id': 'lqQg6PlCWgI',
726                 'ext': 'mp4',
727                 'duration': 6085,
728                 'upload_date': '20150827',
729                 'uploader_id': 'olympic',
730                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
731                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
732                 'uploader': 'Olympic',
733                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
734             },
735             'params': {
736                 'skip_download': 'requires avconv',
737             }
738         },
739         # Non-square pixels
740         {
741             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
742             'info_dict': {
743                 'id': '_b-2C3KPAM0',
744                 'ext': 'mp4',
745                 'stretched_ratio': 16 / 9.,
746                 'duration': 85,
747                 'upload_date': '20110310',
748                 'uploader_id': 'AllenMeow',
749                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
750                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
751                 'uploader': '孫ᄋᄅ',
752                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
753             },
754         },
755         # url_encoded_fmt_stream_map is empty string
756         {
757             'url': 'qEJwOuvDf7I',
758             'info_dict': {
759                 'id': 'qEJwOuvDf7I',
760                 'ext': 'webm',
761                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
762                 'description': '',
763                 'upload_date': '20150404',
764                 'uploader_id': 'spbelect',
765                 'uploader': 'Наблюдатели Петербурга',
766             },
767             'params': {
768                 'skip_download': 'requires avconv',
769             },
770             'skip': 'This live event has ended.',
771         },
772         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
773         {
774             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
775             'info_dict': {
776                 'id': 'FIl7x6_3R5Y',
777                 'ext': 'webm',
778                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
779                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
780                 'duration': 220,
781                 'upload_date': '20150625',
782                 'uploader_id': 'dorappi2000',
783                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
784                 'uploader': 'dorappi2000',
785                 'formats': 'mincount:31',
786             },
787             'skip': 'not actual anymore',
788         },
789         # DASH manifest with segment_list
790         {
791             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
792             'md5': '8ce563a1d667b599d21064e982ab9e31',
793             'info_dict': {
794                 'id': 'CsmdDsKjzN8',
795                 'ext': 'mp4',
796                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
797                 'uploader': 'Airtek',
798                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
799                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
800                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
801             },
802             'params': {
803                 'youtube_include_dash_manifest': True,
804                 'format': '135',  # bestvideo
805             },
806             'skip': 'This live event has ended.',
807         },
808         {
809             # Multifeed videos (multiple cameras), URL is for Main Camera
810             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
811             'info_dict': {
812                 'id': 'jqWvoWXjCVs',
813                 'title': 'teamPGP: Rocket League Noob Stream',
814                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
815             },
816             'playlist': [{
817                 'info_dict': {
818                     'id': 'jqWvoWXjCVs',
819                     'ext': 'mp4',
820                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
821                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
822                     'duration': 7335,
823                     'upload_date': '20150721',
824                     'uploader': 'Beer Games Beer',
825                     'uploader_id': 'beergamesbeer',
826                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
827                     'license': 'Standard YouTube License',
828                 },
829             }, {
830                 'info_dict': {
831                     'id': '6h8e8xoXJzg',
832                     'ext': 'mp4',
833                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
834                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
835                     'duration': 7337,
836                     'upload_date': '20150721',
837                     'uploader': 'Beer Games Beer',
838                     'uploader_id': 'beergamesbeer',
839                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
840                     'license': 'Standard YouTube License',
841                 },
842             }, {
843                 'info_dict': {
844                     'id': 'PUOgX5z9xZw',
845                     'ext': 'mp4',
846                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
847                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
848                     'duration': 7337,
849                     'upload_date': '20150721',
850                     'uploader': 'Beer Games Beer',
851                     'uploader_id': 'beergamesbeer',
852                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
853                     'license': 'Standard YouTube License',
854                 },
855             }, {
856                 'info_dict': {
857                     'id': 'teuwxikvS5k',
858                     'ext': 'mp4',
859                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
860                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
861                     'duration': 7334,
862                     'upload_date': '20150721',
863                     'uploader': 'Beer Games Beer',
864                     'uploader_id': 'beergamesbeer',
865                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
866                     'license': 'Standard YouTube License',
867                 },
868             }],
869             'params': {
870                 'skip_download': True,
871             },
872             'skip': 'This video is not available.',
873         },
874         {
875             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
876             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
877             'info_dict': {
878                 'id': 'gVfLd0zydlo',
879                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
880             },
881             'playlist_count': 2,
882             'skip': 'Not multifeed anymore',
883         },
884         {
885             'url': 'https://vid.plus/FlRa-iH7PGw',
886             'only_matching': True,
887         },
888         {
889             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
890             'only_matching': True,
891         },
892         {
893             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
894             # Also tests cut-off URL expansion in video description (see
895             # https://github.com/ytdl-org/youtube-dl/issues/1892,
896             # https://github.com/ytdl-org/youtube-dl/issues/8164)
897             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
898             'info_dict': {
899                 'id': 'lsguqyKfVQg',
900                 'ext': 'mp4',
901                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
902                 'alt_title': 'Dark Walk - Position Music',
903                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
904                 'duration': 133,
905                 'upload_date': '20151119',
906                 'uploader_id': 'IronSoulElf',
907                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
908                 'uploader': 'IronSoulElf',
909                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
910                 'track': 'Dark Walk - Position Music',
911                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
912                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
913             },
914             'params': {
915                 'skip_download': True,
916             },
917         },
918         {
919             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
920             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
921             'only_matching': True,
922         },
923         {
924             # Video with yt:stretch=17:0
925             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
926             'info_dict': {
927                 'id': 'Q39EVAstoRM',
928                 'ext': 'mp4',
929                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
930                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
931                 'upload_date': '20151107',
932                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
933                 'uploader': 'CH GAMER DROID',
934             },
935             'params': {
936                 'skip_download': True,
937             },
938             'skip': 'This video does not exist.',
939         },
940         {
941             # Video licensed under Creative Commons
942             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
943             'info_dict': {
944                 'id': 'M4gD1WSo5mA',
945                 'ext': 'mp4',
946                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
947                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
948                 'duration': 721,
949                 'upload_date': '20150127',
950                 'uploader_id': 'BerkmanCenter',
951                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
952                 'uploader': 'The Berkman Klein Center for Internet & Society',
953                 'license': 'Creative Commons Attribution license (reuse allowed)',
954             },
955             'params': {
956                 'skip_download': True,
957             },
958         },
959         {
960             # Channel-like uploader_url
961             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
962             'info_dict': {
963                 'id': 'eQcmzGIKrzg',
964                 'ext': 'mp4',
965                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
966                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
967                 'duration': 4060,
968                 'upload_date': '20151119',
969                 'uploader': 'Bernie Sanders',
970                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
971                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
972                 'license': 'Creative Commons Attribution license (reuse allowed)',
973             },
974             'params': {
975                 'skip_download': True,
976             },
977         },
978         {
979             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
980             'only_matching': True,
981         },
982         {
983             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
984             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
985             'only_matching': True,
986         },
987         {
988             # Rental video preview
989             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
990             'info_dict': {
991                 'id': 'uGpuVWrhIzE',
992                 'ext': 'mp4',
993                 'title': 'Piku - Trailer',
994                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
995                 'upload_date': '20150811',
996                 'uploader': 'FlixMatrix',
997                 'uploader_id': 'FlixMatrixKaravan',
998                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
999                 'license': 'Standard YouTube License',
1000             },
1001             'params': {
1002                 'skip_download': True,
1003             },
1004             'skip': 'This video is not available.',
1005         },
1006         {
1007             # YouTube Red video with episode data
1008             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1009             'info_dict': {
1010                 'id': 'iqKdEhx-dD4',
1011                 'ext': 'mp4',
1012                 'title': 'Isolation - Mind Field (Ep 1)',
1013                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1014                 'duration': 2085,
1015                 'upload_date': '20170118',
1016                 'uploader': 'Vsauce',
1017                 'uploader_id': 'Vsauce',
1018                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1019                 'series': 'Mind Field',
1020                 'season_number': 1,
1021                 'episode_number': 1,
1022             },
1023             'params': {
1024                 'skip_download': True,
1025             },
1026             'expected_warnings': [
1027                 'Skipping DASH manifest',
1028             ],
1029         },
1030         {
1031             # The following content has been identified by the YouTube community
1032             # as inappropriate or offensive to some audiences.
1033             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1034             'info_dict': {
1035                 'id': '6SJNVb0GnPI',
1036                 'ext': 'mp4',
1037                 'title': 'Race Differences in Intelligence',
1038                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1039                 'duration': 965,
1040                 'upload_date': '20140124',
1041                 'uploader': 'New Century Foundation',
1042                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1043                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1044             },
1045             'params': {
1046                 'skip_download': True,
1047             },
1048         },
1049         {
1050             # itag 212
1051             'url': '1t24XAntNCY',
1052             'only_matching': True,
1053         },
1054         {
1055             # geo restricted to JP
1056             'url': 'sJL6WA-aGkQ',
1057             'only_matching': True,
1058         },
1059         {
1060             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1061             'only_matching': True,
1062         },
1063         {
1064             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1065             'only_matching': True,
1066         },
1067         {
1068             # DRM protected
1069             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1070             'only_matching': True,
1071         },
1072         {
1073             # Video with unsupported adaptive stream type formats
1074             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1075             'info_dict': {
1076                 'id': 'Z4Vy8R84T1U',
1077                 'ext': 'mp4',
1078                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1079                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1080                 'duration': 433,
1081                 'upload_date': '20130923',
1082                 'uploader': 'Amelia Putri Harwita',
1083                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1084                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1085                 'formats': 'maxcount:10',
1086             },
1087             'params': {
1088                 'skip_download': True,
1089                 'youtube_include_dash_manifest': False,
1090             },
1091         },
1092         {
1093             # Youtube Music Auto-generated description
1094             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1095             'info_dict': {
1096                 'id': 'MgNrAu2pzNs',
1097                 'ext': 'mp4',
1098                 'title': 'Voyeur Girl',
1099                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1100                 'upload_date': '20190312',
1101                 'uploader': 'Various Artists - Topic',
1102                 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1103                 'artist': 'Stephen',
1104                 'track': 'Voyeur Girl',
1105                 'album': 'it\'s too much love to know my dear',
1106                 'release_date': '20190313',
1107                 'release_year': 2019,
1108             },
1109             'params': {
1110                 'skip_download': True,
1111             },
1112         },
1113         {
1114             # Youtube Music Auto-generated description
1115             # Retrieve 'artist' field from 'Artist:' in video description
1116             # when it is present on youtube music video
1117             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1118             'info_dict': {
1119                 'id': 'k0jLE7tTwjY',
1120                 'ext': 'mp4',
1121                 'title': 'Latch Feat. Sam Smith',
1122                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1123                 'upload_date': '20150110',
1124                 'uploader': 'Various Artists - Topic',
1125                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1126                 'artist': 'Disclosure',
1127                 'track': 'Latch Feat. Sam Smith',
1128                 'album': 'Latch Featuring Sam Smith',
1129                 'release_date': '20121008',
1130                 'release_year': 2012,
1131             },
1132             'params': {
1133                 'skip_download': True,
1134             },
1135         },
1136         {
1137             # Youtube Music Auto-generated description
1138             # handle multiple artists on youtube music video
1139             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1140             'info_dict': {
1141                 'id': '74qn0eJSjpA',
1142                 'ext': 'mp4',
1143                 'title': 'Eastside',
1144                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1145                 'upload_date': '20180710',
1146                 'uploader': 'Benny Blanco - Topic',
1147                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1148                 'artist': 'benny blanco, Halsey, Khalid',
1149                 'track': 'Eastside',
1150                 'album': 'Eastside',
1151                 'release_date': '20180713',
1152                 'release_year': 2018,
1153             },
1154             'params': {
1155                 'skip_download': True,
1156             },
1157         },
1158         {
1159             # Youtube Music Auto-generated description
1160             # handle youtube music video with release_year and no release_date
1161             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1162             'info_dict': {
1163                 'id': '-hcAI0g-f5M',
1164                 'ext': 'mp4',
1165                 'title': 'Put It On Me',
1166                 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1167                 'upload_date': '20180426',
1168                 'uploader': 'Matt Maeson - Topic',
1169                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1170                 'artist': 'Matt Maeson',
1171                 'track': 'Put It On Me',
1172                 'album': 'The Hearse',
1173                 'release_date': None,
1174                 'release_year': 2018,
1175             },
1176             'params': {
1177                 'skip_download': True,
1178             },
1179         },
1180     ]
1181
1182     def __init__(self, *args, **kwargs):
1183         super(YoutubeIE, self).__init__(*args, **kwargs)
1184         self._player_cache = {}
1185
1186     def report_video_info_webpage_download(self, video_id):
1187         """Report attempt to download video info webpage."""
1188         self.to_screen('%s: Downloading video info webpage' % video_id)
1189
1190     def report_information_extraction(self, video_id):
1191         """Report attempt to extract video information."""
1192         self.to_screen('%s: Extracting video information' % video_id)
1193
1194     def report_unavailable_format(self, video_id, format):
1195         """Report extracted video URL."""
1196         self.to_screen('%s: Format %s not available' % (video_id, format))
1197
1198     def report_rtmp_download(self):
1199         """Indicate the download will use the RTMP protocol."""
1200         self.to_screen('RTMP download detected')
1201
1202     def _signature_cache_id(self, example_sig):
1203         """ Return a string representation of a signature """
1204         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1205
1206     def _extract_signature_function(self, video_id, player_url, example_sig):
1207         id_m = re.match(
1208             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1209             player_url)
1210         if not id_m:
1211             raise ExtractorError('Cannot identify player %r' % player_url)
1212         player_type = id_m.group('ext')
1213         player_id = id_m.group('id')
1214
1215         # Read from filesystem cache
1216         func_id = '%s_%s_%s' % (
1217             player_type, player_id, self._signature_cache_id(example_sig))
1218         assert os.path.basename(func_id) == func_id
1219
1220         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1221         if cache_spec is not None:
1222             return lambda s: ''.join(s[i] for i in cache_spec)
1223
1224         download_note = (
1225             'Downloading player %s' % player_url
1226             if self._downloader.params.get('verbose') else
1227             'Downloading %s player %s' % (player_type, player_id)
1228         )
1229         if player_type == 'js':
1230             code = self._download_webpage(
1231                 player_url, video_id,
1232                 note=download_note,
1233                 errnote='Download of %s failed' % player_url)
1234             res = self._parse_sig_js(code)
1235         elif player_type == 'swf':
1236             urlh = self._request_webpage(
1237                 player_url, video_id,
1238                 note=download_note,
1239                 errnote='Download of %s failed' % player_url)
1240             code = urlh.read()
1241             res = self._parse_sig_swf(code)
1242         else:
1243             assert False, 'Invalid player type %r' % player_type
1244
1245         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1246         cache_res = res(test_string)
1247         cache_spec = [ord(c) for c in cache_res]
1248
1249         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1250         return res
1251
1252     def _print_sig_code(self, func, example_sig):
1253         def gen_sig_code(idxs):
1254             def _genslice(start, end, step):
1255                 starts = '' if start == 0 else str(start)
1256                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1257                 steps = '' if step == 1 else (':%d' % step)
1258                 return 's[%s%s%s]' % (starts, ends, steps)
1259
1260             step = None
1261             # Quelch pyflakes warnings - start will be set when step is set
1262             start = '(Never used)'
1263             for i, prev in zip(idxs[1:], idxs[:-1]):
1264                 if step is not None:
1265                     if i - prev == step:
1266                         continue
1267                     yield _genslice(start, prev, step)
1268                     step = None
1269                     continue
1270                 if i - prev in [-1, 1]:
1271                     step = i - prev
1272                     start = prev
1273                     continue
1274                 else:
1275                     yield 's[%d]' % prev
1276             if step is None:
1277                 yield 's[%d]' % i
1278             else:
1279                 yield _genslice(start, i, step)
1280
1281         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1282         cache_res = func(test_string)
1283         cache_spec = [ord(c) for c in cache_res]
1284         expr_code = ' + '.join(gen_sig_code(cache_spec))
1285         signature_id_tuple = '(%s)' % (
1286             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1287         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1288                 '    return %s\n') % (signature_id_tuple, expr_code)
1289         self.to_screen('Extracted signature function:\n' + code)
1290
1291     def _parse_sig_js(self, jscode):
1292         funcname = self._search_regex(
1293             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1294              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1295              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(',
1296              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1297              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1298             jscode, 'Initial JS player signature function name', group='sig')
1299
1300         jsi = JSInterpreter(jscode)
1301         initial_function = jsi.extract_function(funcname)
1302         return lambda s: initial_function([s])
1303
1304     def _parse_sig_swf(self, file_contents):
1305         swfi = SWFInterpreter(file_contents)
1306         TARGET_CLASSNAME = 'SignatureDecipher'
1307         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1308         initial_function = swfi.extract_function(searched_class, 'decipher')
1309         return lambda s: initial_function([s])
1310
1311     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1312         """Turn the encrypted s field into a working signature"""
1313
1314         if player_url is None:
1315             raise ExtractorError('Cannot decrypt signature without player_url')
1316
1317         if player_url.startswith('//'):
1318             player_url = 'https:' + player_url
1319         elif not re.match(r'https?://', player_url):
1320             player_url = compat_urlparse.urljoin(
1321                 'https://www.youtube.com', player_url)
1322         try:
1323             player_id = (player_url, self._signature_cache_id(s))
1324             if player_id not in self._player_cache:
1325                 func = self._extract_signature_function(
1326                     video_id, player_url, s
1327                 )
1328                 self._player_cache[player_id] = func
1329             func = self._player_cache[player_id]
1330             if self._downloader.params.get('youtube_print_sig_code'):
1331                 self._print_sig_code(func, s)
1332             return func(s)
1333         except Exception as e:
1334             tb = traceback.format_exc()
1335             raise ExtractorError(
1336                 'Signature extraction failed: ' + tb, cause=e)
1337
1338     def _get_subtitles(self, video_id, webpage):
1339         try:
1340             subs_doc = self._download_xml(
1341                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1342                 video_id, note=False)
1343         except ExtractorError as err:
1344             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1345             return {}
1346
1347         sub_lang_list = {}
1348         for track in subs_doc.findall('track'):
1349             lang = track.attrib['lang_code']
1350             if lang in sub_lang_list:
1351                 continue
1352             sub_formats = []
1353             for ext in self._SUBTITLE_FORMATS:
1354                 params = compat_urllib_parse_urlencode({
1355                     'lang': lang,
1356                     'v': video_id,
1357                     'fmt': ext,
1358                     'name': track.attrib['name'].encode('utf-8'),
1359                 })
1360                 sub_formats.append({
1361                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1362                     'ext': ext,
1363                 })
1364             sub_lang_list[lang] = sub_formats
1365         if not sub_lang_list:
1366             self._downloader.report_warning('video doesn\'t have subtitles')
1367             return {}
1368         return sub_lang_list
1369
1370     def _get_ytplayer_config(self, video_id, webpage):
1371         patterns = (
1372             # User data may contain arbitrary character sequences that may affect
1373             # JSON extraction with regex, e.g. when '};' is contained the second
1374             # regex won't capture the whole JSON. Yet working around by trying more
1375             # concrete regex first keeping in mind proper quoted string handling
1376             # to be implemented in future that will replace this workaround (see
1377             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1378             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1379             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1380             r';ytplayer\.config\s*=\s*({.+?});',
1381         )
1382         config = self._search_regex(
1383             patterns, webpage, 'ytplayer.config', default=None)
1384         if config:
1385             return self._parse_json(
1386                 uppercase_escape(config), video_id, fatal=False)
1387
1388     def _get_automatic_captions(self, video_id, webpage):
1389         """We need the webpage for getting the captions url, pass it as an
1390            argument to speed up the process."""
1391         self.to_screen('%s: Looking for automatic captions' % video_id)
1392         player_config = self._get_ytplayer_config(video_id, webpage)
1393         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1394         if not player_config:
1395             self._downloader.report_warning(err_msg)
1396             return {}
1397         try:
1398             args = player_config['args']
1399             caption_url = args.get('ttsurl')
1400             if caption_url:
1401                 timestamp = args['timestamp']
1402                 # We get the available subtitles
1403                 list_params = compat_urllib_parse_urlencode({
1404                     'type': 'list',
1405                     'tlangs': 1,
1406                     'asrs': 1,
1407                 })
1408                 list_url = caption_url + '&' + list_params
1409                 caption_list = self._download_xml(list_url, video_id)
1410                 original_lang_node = caption_list.find('track')
1411                 if original_lang_node is None:
1412                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1413                     return {}
1414                 original_lang = original_lang_node.attrib['lang_code']
1415                 caption_kind = original_lang_node.attrib.get('kind', '')
1416
1417                 sub_lang_list = {}
1418                 for lang_node in caption_list.findall('target'):
1419                     sub_lang = lang_node.attrib['lang_code']
1420                     sub_formats = []
1421                     for ext in self._SUBTITLE_FORMATS:
1422                         params = compat_urllib_parse_urlencode({
1423                             'lang': original_lang,
1424                             'tlang': sub_lang,
1425                             'fmt': ext,
1426                             'ts': timestamp,
1427                             'kind': caption_kind,
1428                         })
1429                         sub_formats.append({
1430                             'url': caption_url + '&' + params,
1431                             'ext': ext,
1432                         })
1433                     sub_lang_list[sub_lang] = sub_formats
1434                 return sub_lang_list
1435
1436             def make_captions(sub_url, sub_langs):
1437                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1438                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1439                 captions = {}
1440                 for sub_lang in sub_langs:
1441                     sub_formats = []
1442                     for ext in self._SUBTITLE_FORMATS:
1443                         caption_qs.update({
1444                             'tlang': [sub_lang],
1445                             'fmt': [ext],
1446                         })
1447                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1448                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1449                         sub_formats.append({
1450                             'url': sub_url,
1451                             'ext': ext,
1452                         })
1453                     captions[sub_lang] = sub_formats
1454                 return captions
1455
1456             # New captions format as of 22.06.2017
1457             player_response = args.get('player_response')
1458             if player_response and isinstance(player_response, compat_str):
1459                 player_response = self._parse_json(
1460                     player_response, video_id, fatal=False)
1461                 if player_response:
1462                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1463                     base_url = renderer['captionTracks'][0]['baseUrl']
1464                     sub_lang_list = []
1465                     for lang in renderer['translationLanguages']:
1466                         lang_code = lang.get('languageCode')
1467                         if lang_code:
1468                             sub_lang_list.append(lang_code)
1469                     return make_captions(base_url, sub_lang_list)
1470
1471             # Some videos don't provide ttsurl but rather caption_tracks and
1472             # caption_translation_languages (e.g. 20LmZk1hakA)
1473             # Does not used anymore as of 22.06.2017
1474             caption_tracks = args['caption_tracks']
1475             caption_translation_languages = args['caption_translation_languages']
1476             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1477             sub_lang_list = []
1478             for lang in caption_translation_languages.split(','):
1479                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1480                 sub_lang = lang_qs.get('lc', [None])[0]
1481                 if sub_lang:
1482                     sub_lang_list.append(sub_lang)
1483             return make_captions(caption_url, sub_lang_list)
1484         # An extractor error can be raise by the download process if there are
1485         # no automatic captions but there are subtitles
1486         except (KeyError, IndexError, ExtractorError):
1487             self._downloader.report_warning(err_msg)
1488             return {}
1489
1490     def _mark_watched(self, video_id, video_info, player_response):
1491         playback_url = url_or_none(try_get(
1492             player_response,
1493             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1494             video_info, lambda x: x['videostats_playback_base_url'][0]))
1495         if not playback_url:
1496             return
1497         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1498         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1499
1500         # cpn generation algorithm is reverse engineered from base.js.
1501         # In fact it works even with dummy cpn.
1502         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1503         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1504
1505         qs.update({
1506             'ver': ['2'],
1507             'cpn': [cpn],
1508         })
1509         playback_url = compat_urlparse.urlunparse(
1510             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1511
1512         self._download_webpage(
1513             playback_url, video_id, 'Marking watched',
1514             'Unable to mark watched', fatal=False)
1515
1516     @staticmethod
1517     def _extract_urls(webpage):
1518         # Embedded YouTube player
1519         entries = [
1520             unescapeHTML(mobj.group('url'))
1521             for mobj in re.finditer(r'''(?x)
1522             (?:
1523                 <iframe[^>]+?src=|
1524                 data-video-url=|
1525                 <embed[^>]+?src=|
1526                 embedSWF\(?:\s*|
1527                 <object[^>]+data=|
1528                 new\s+SWFObject\(
1529             )
1530             (["\'])
1531                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1532                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1533             \1''', webpage)]
1534
1535         # lazyYT YouTube embed
1536         entries.extend(list(map(
1537             unescapeHTML,
1538             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1539
1540         # Wordpress "YouTube Video Importer" plugin
1541         matches = re.findall(r'''(?x)<div[^>]+
1542             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1543             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1544         entries.extend(m[-1] for m in matches)
1545
1546         return entries
1547
1548     @staticmethod
1549     def _extract_url(webpage):
1550         urls = YoutubeIE._extract_urls(webpage)
1551         return urls[0] if urls else None
1552
1553     @classmethod
1554     def extract_id(cls, url):
1555         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1556         if mobj is None:
1557             raise ExtractorError('Invalid URL: %s' % url)
1558         video_id = mobj.group(2)
1559         return video_id
1560
1561     def _extract_annotations(self, video_id):
1562         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1563         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1564
1565     @staticmethod
1566     def _extract_chapters(description, duration):
1567         if not description:
1568             return None
1569         chapter_lines = re.findall(
1570             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1571             description)
1572         if not chapter_lines:
1573             return None
1574         chapters = []
1575         for next_num, (chapter_line, time_point) in enumerate(
1576                 chapter_lines, start=1):
1577             start_time = parse_duration(time_point)
1578             if start_time is None:
1579                 continue
1580             if start_time > duration:
1581                 break
1582             end_time = (duration if next_num == len(chapter_lines)
1583                         else parse_duration(chapter_lines[next_num][1]))
1584             if end_time is None:
1585                 continue
1586             if end_time > duration:
1587                 end_time = duration
1588             if start_time > end_time:
1589                 break
1590             chapter_title = re.sub(
1591                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1592             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1593             chapters.append({
1594                 'start_time': start_time,
1595                 'end_time': end_time,
1596                 'title': chapter_title,
1597             })
1598         return chapters
1599
1600     def _real_extract(self, url):
1601         url, smuggled_data = unsmuggle_url(url, {})
1602
1603         proto = (
1604             'http' if self._downloader.params.get('prefer_insecure', False)
1605             else 'https')
1606
1607         start_time = None
1608         end_time = None
1609         parsed_url = compat_urllib_parse_urlparse(url)
1610         for component in [parsed_url.fragment, parsed_url.query]:
1611             query = compat_parse_qs(component)
1612             if start_time is None and 't' in query:
1613                 start_time = parse_duration(query['t'][0])
1614             if start_time is None and 'start' in query:
1615                 start_time = parse_duration(query['start'][0])
1616             if end_time is None and 'end' in query:
1617                 end_time = parse_duration(query['end'][0])
1618
1619         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1620         mobj = re.search(self._NEXT_URL_RE, url)
1621         if mobj:
1622             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1623         video_id = self.extract_id(url)
1624
1625         # Get video webpage
1626         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1627         video_webpage = self._download_webpage(url, video_id)
1628
1629         # Attempt to extract SWF player URL
1630         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1631         if mobj is not None:
1632             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1633         else:
1634             player_url = None
1635
1636         dash_mpds = []
1637
1638         def add_dash_mpd(video_info):
1639             dash_mpd = video_info.get('dashmpd')
1640             if dash_mpd and dash_mpd[0] not in dash_mpds:
1641                 dash_mpds.append(dash_mpd[0])
1642
1643         def add_dash_mpd_pr(pl_response):
1644             dash_mpd = url_or_none(try_get(
1645                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1646                 compat_str))
1647             if dash_mpd and dash_mpd not in dash_mpds:
1648                 dash_mpds.append(dash_mpd)
1649
1650         is_live = None
1651         view_count = None
1652
1653         def extract_view_count(v_info):
1654             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1655
1656         def extract_token(v_info):
1657             return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1658
1659         player_response = {}
1660
1661         # Get video info
1662         embed_webpage = None
1663         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1664             age_gate = True
1665             # We simulate the access to the video from www.youtube.com/v/{video_id}
1666             # this can be viewed without login into Youtube
1667             url = proto + '://www.youtube.com/embed/%s' % video_id
1668             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1669             data = compat_urllib_parse_urlencode({
1670                 'video_id': video_id,
1671                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1672                 'sts': self._search_regex(
1673                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1674             })
1675             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1676             video_info_webpage = self._download_webpage(
1677                 video_info_url, video_id,
1678                 note='Refetching age-gated info webpage',
1679                 errnote='unable to download video info webpage')
1680             video_info = compat_parse_qs(video_info_webpage)
1681             add_dash_mpd(video_info)
1682         else:
1683             age_gate = False
1684             video_info = None
1685             sts = None
1686             # Try looking directly into the video webpage
1687             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1688             if ytplayer_config:
1689                 args = ytplayer_config['args']
1690                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1691                     # Convert to the same format returned by compat_parse_qs
1692                     video_info = dict((k, [v]) for k, v in args.items())
1693                     add_dash_mpd(video_info)
1694                 # Rental video is not rented but preview is available (e.g.
1695                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1696                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1697                 if not video_info and args.get('ypc_vid'):
1698                     return self.url_result(
1699                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1700                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1701                     is_live = True
1702                 sts = ytplayer_config.get('sts')
1703                 if not player_response:
1704                     pl_response = str_or_none(args.get('player_response'))
1705                     if pl_response:
1706                         pl_response = self._parse_json(pl_response, video_id, fatal=False)
1707                         if isinstance(pl_response, dict):
1708                             player_response = pl_response
1709             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1710                 add_dash_mpd_pr(player_response)
1711                 # We also try looking in get_video_info since it may contain different dashmpd
1712                 # URL that points to a DASH manifest with possibly different itag set (some itags
1713                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1714                 # manifest pointed by get_video_info's dashmpd).
1715                 # The general idea is to take a union of itags of both DASH manifests (for example
1716                 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1717                 self.report_video_info_webpage_download(video_id)
1718                 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1719                     query = {
1720                         'video_id': video_id,
1721                         'ps': 'default',
1722                         'eurl': '',
1723                         'gl': 'US',
1724                         'hl': 'en',
1725                     }
1726                     if el:
1727                         query['el'] = el
1728                     if sts:
1729                         query['sts'] = sts
1730                     video_info_webpage = self._download_webpage(
1731                         '%s://www.youtube.com/get_video_info' % proto,
1732                         video_id, note=False,
1733                         errnote='unable to download video info webpage',
1734                         fatal=False, query=query)
1735                     if not video_info_webpage:
1736                         continue
1737                     get_video_info = compat_parse_qs(video_info_webpage)
1738                     if not player_response:
1739                         pl_response = get_video_info.get('player_response', [None])[0]
1740                         if isinstance(pl_response, dict):
1741                             player_response = pl_response
1742                             add_dash_mpd_pr(player_response)
1743                     add_dash_mpd(get_video_info)
1744                     if view_count is None:
1745                         view_count = extract_view_count(get_video_info)
1746                     if not video_info:
1747                         video_info = get_video_info
1748                     get_token = extract_token(get_video_info)
1749                     if get_token:
1750                         # Different get_video_info requests may report different results, e.g.
1751                         # some may report video unavailability, but some may serve it without
1752                         # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1753                         # the original webpage as well as el=info and el=embedded get_video_info
1754                         # requests report video unavailability due to geo restriction while
1755                         # el=detailpage succeeds and returns valid data). This is probably
1756                         # due to YouTube measures against IP ranges of hosting providers.
1757                         # Working around by preferring the first succeeded video_info containing
1758                         # the token if no such video_info yet was found.
1759                         token = extract_token(video_info)
1760                         if not token:
1761                             video_info = get_video_info
1762                         break
1763
1764         def extract_unavailable_message():
1765             return self._html_search_regex(
1766                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1767                 video_webpage, 'unavailable message', default=None)
1768
1769         if not video_info:
1770             unavailable_message = extract_unavailable_message()
1771             if not unavailable_message:
1772                 unavailable_message = 'Unable to extract video data'
1773             raise ExtractorError(
1774                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1775
1776         if video_info.get('license_info'):
1777             raise ExtractorError('This video is DRM protected.', expected=True)
1778
1779         video_details = try_get(
1780             player_response, lambda x: x['videoDetails'], dict) or {}
1781
1782         # title
1783         if 'title' in video_info:
1784             video_title = video_info['title'][0]
1785         elif 'title' in player_response:
1786             video_title = video_details['title']
1787         else:
1788             self._downloader.report_warning('Unable to extract video title')
1789             video_title = '_'
1790
1791         # description
1792         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1793         if video_description:
1794
1795             def replace_url(m):
1796                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1797                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1798                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1799                     qs = compat_parse_qs(parsed_redir_url.query)
1800                     q = qs.get('q')
1801                     if q and q[0]:
1802                         return q[0]
1803                 return redir_url
1804
1805             description_original = video_description = re.sub(r'''(?x)
1806                 <a\s+
1807                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1808                     (?:title|href)="([^"]+)"\s+
1809                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1810                     class="[^"]*"[^>]*>
1811                 [^<]+\.{3}\s*
1812                 </a>
1813             ''', replace_url, video_description)
1814             video_description = clean_html(video_description)
1815         else:
1816             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1817             if fd_mobj:
1818                 video_description = unescapeHTML(fd_mobj.group(1))
1819             else:
1820                 video_description = ''
1821
1822         if not smuggled_data.get('force_singlefeed', False):
1823             if not self._downloader.params.get('noplaylist'):
1824                 multifeed_metadata_list = try_get(
1825                     player_response,
1826                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1827                     compat_str) or try_get(
1828                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1829                 if multifeed_metadata_list:
1830                     entries = []
1831                     feed_ids = []
1832                     for feed in multifeed_metadata_list.split(','):
1833                         # Unquote should take place before split on comma (,) since textual
1834                         # fields may contain comma as well (see
1835                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1836                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1837                         entries.append({
1838                             '_type': 'url_transparent',
1839                             'ie_key': 'Youtube',
1840                             'url': smuggle_url(
1841                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1842                                 {'force_singlefeed': True}),
1843                             'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1844                         })
1845                         feed_ids.append(feed_data['id'][0])
1846                     self.to_screen(
1847                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1848                         % (', '.join(feed_ids), video_id))
1849                     return self.playlist_result(entries, video_id, video_title, video_description)
1850             else:
1851                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1852
1853         if view_count is None:
1854             view_count = extract_view_count(video_info)
1855         if view_count is None and video_details:
1856             view_count = int_or_none(video_details.get('viewCount'))
1857
1858         # Check for "rental" videos
1859         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1860             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1861
1862         def _extract_filesize(media_url):
1863             return int_or_none(self._search_regex(
1864                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1865
1866         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1867             self.report_rtmp_download()
1868             formats = [{
1869                 'format_id': '_rtmp',
1870                 'protocol': 'rtmp',
1871                 'url': video_info['conn'][0],
1872                 'player_url': player_url,
1873             }]
1874         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1875             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1876             if 'rtmpe%3Dyes' in encoded_url_map:
1877                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1878             formats_spec = {}
1879             fmt_list = video_info.get('fmt_list', [''])[0]
1880             if fmt_list:
1881                 for fmt in fmt_list.split(','):
1882                     spec = fmt.split('/')
1883                     if len(spec) > 1:
1884                         width_height = spec[1].split('x')
1885                         if len(width_height) == 2:
1886                             formats_spec[spec[0]] = {
1887                                 'resolution': spec[1],
1888                                 'width': int_or_none(width_height[0]),
1889                                 'height': int_or_none(width_height[1]),
1890                             }
1891             q = qualities(['small', 'medium', 'hd720'])
1892             streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1893             if streaming_formats:
1894                 for fmt in streaming_formats:
1895                     itag = str_or_none(fmt.get('itag'))
1896                     if not itag:
1897                         continue
1898                     quality = fmt.get('quality')
1899                     quality_label = fmt.get('qualityLabel') or quality
1900                     formats_spec[itag] = {
1901                         'asr': int_or_none(fmt.get('audioSampleRate')),
1902                         'filesize': int_or_none(fmt.get('contentLength')),
1903                         'format_note': quality_label,
1904                         'fps': int_or_none(fmt.get('fps')),
1905                         'height': int_or_none(fmt.get('height')),
1906                         'quality': q(quality),
1907                         # bitrate for itag 43 is always 2147483647
1908                         'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1909                         'width': int_or_none(fmt.get('width')),
1910                     }
1911             formats = []
1912             for url_data_str in encoded_url_map.split(','):
1913                 url_data = compat_parse_qs(url_data_str)
1914                 if 'itag' not in url_data or 'url' not in url_data:
1915                     continue
1916                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1917                 # Unsupported FORMAT_STREAM_TYPE_OTF
1918                 if stream_type == 3:
1919                     continue
1920                 format_id = url_data['itag'][0]
1921                 url = url_data['url'][0]
1922
1923                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1924                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1925                     jsplayer_url_json = self._search_regex(
1926                         ASSETS_RE,
1927                         embed_webpage if age_gate else video_webpage,
1928                         'JS player URL (1)', default=None)
1929                     if not jsplayer_url_json and not age_gate:
1930                         # We need the embed website after all
1931                         if embed_webpage is None:
1932                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1933                             embed_webpage = self._download_webpage(
1934                                 embed_url, video_id, 'Downloading embed webpage')
1935                         jsplayer_url_json = self._search_regex(
1936                             ASSETS_RE, embed_webpage, 'JS player URL')
1937
1938                     player_url = json.loads(jsplayer_url_json)
1939                     if player_url is None:
1940                         player_url_json = self._search_regex(
1941                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1942                             video_webpage, 'age gate player URL')
1943                         player_url = json.loads(player_url_json)
1944
1945                 if 'sig' in url_data:
1946                     url += '&signature=' + url_data['sig'][0]
1947                 elif 's' in url_data:
1948                     encrypted_sig = url_data['s'][0]
1949
1950                     if self._downloader.params.get('verbose'):
1951                         if player_url is None:
1952                             player_version = 'unknown'
1953                             player_desc = 'unknown'
1954                         else:
1955                             if player_url.endswith('swf'):
1956                                 player_version = self._search_regex(
1957                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1958                                     'flash player', fatal=False)
1959                                 player_desc = 'flash player %s' % player_version
1960                             else:
1961                                 player_version = self._search_regex(
1962                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1963                                      r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
1964                                     player_url,
1965                                     'html5 player', fatal=False)
1966                                 player_desc = 'html5 player %s' % player_version
1967
1968                         parts_sizes = self._signature_cache_id(encrypted_sig)
1969                         self.to_screen('{%s} signature length %s, %s' %
1970                                        (format_id, parts_sizes, player_desc))
1971
1972                     signature = self._decrypt_signature(
1973                         encrypted_sig, video_id, player_url, age_gate)
1974                     url += '&signature=' + signature
1975                 if 'ratebypass' not in url:
1976                     url += '&ratebypass=yes'
1977
1978                 dct = {
1979                     'format_id': format_id,
1980                     'url': url,
1981                     'player_url': player_url,
1982                 }
1983                 if format_id in self._formats:
1984                     dct.update(self._formats[format_id])
1985                 if format_id in formats_spec:
1986                     dct.update(formats_spec[format_id])
1987
1988                 # Some itags are not included in DASH manifest thus corresponding formats will
1989                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
1990                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1991                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1992                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1993
1994                 filesize = int_or_none(url_data.get(
1995                     'clen', [None])[0]) or _extract_filesize(url)
1996
1997                 quality = url_data.get('quality', [None])[0]
1998
1999                 more_fields = {
2000                     'filesize': filesize,
2001                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
2002                     'width': width,
2003                     'height': height,
2004                     'fps': int_or_none(url_data.get('fps', [None])[0]),
2005                     'format_note': url_data.get('quality_label', [None])[0] or quality,
2006                     'quality': q(quality),
2007                 }
2008                 for key, value in more_fields.items():
2009                     if value:
2010                         dct[key] = value
2011                 type_ = url_data.get('type', [None])[0]
2012                 if type_:
2013                     type_split = type_.split(';')
2014                     kind_ext = type_split[0].split('/')
2015                     if len(kind_ext) == 2:
2016                         kind, _ = kind_ext
2017                         dct['ext'] = mimetype2ext(type_split[0])
2018                         if kind in ('audio', 'video'):
2019                             codecs = None
2020                             for mobj in re.finditer(
2021                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2022                                 if mobj.group('key') == 'codecs':
2023                                     codecs = mobj.group('val')
2024                                     break
2025                             if codecs:
2026                                 dct.update(parse_codecs(codecs))
2027                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2028                     dct['downloader_options'] = {
2029                         # Youtube throttles chunks >~10M
2030                         'http_chunk_size': 10485760,
2031                     }
2032                 formats.append(dct)
2033         else:
2034             manifest_url = (
2035                 url_or_none(try_get(
2036                     player_response,
2037                     lambda x: x['streamingData']['hlsManifestUrl'],
2038                     compat_str)) or
2039                 url_or_none(try_get(
2040                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2041             if manifest_url:
2042                 formats = []
2043                 m3u8_formats = self._extract_m3u8_formats(
2044                     manifest_url, video_id, 'mp4', fatal=False)
2045                 for a_format in m3u8_formats:
2046                     itag = self._search_regex(
2047                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2048                     if itag:
2049                         a_format['format_id'] = itag
2050                         if itag in self._formats:
2051                             dct = self._formats[itag].copy()
2052                             dct.update(a_format)
2053                             a_format = dct
2054                     a_format['player_url'] = player_url
2055                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2056                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2057                     formats.append(a_format)
2058             else:
2059                 error_message = clean_html(video_info.get('reason', [None])[0])
2060                 if not error_message:
2061                     error_message = extract_unavailable_message()
2062                 if error_message:
2063                     raise ExtractorError(error_message, expected=True)
2064                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2065
2066         # uploader
2067         video_uploader = try_get(
2068             video_info, lambda x: x['author'][0],
2069             compat_str) or str_or_none(video_details.get('author'))
2070         if video_uploader:
2071             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2072         else:
2073             self._downloader.report_warning('unable to extract uploader name')
2074
2075         # uploader_id
2076         video_uploader_id = None
2077         video_uploader_url = None
2078         mobj = re.search(
2079             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2080             video_webpage)
2081         if mobj is not None:
2082             video_uploader_id = mobj.group('uploader_id')
2083             video_uploader_url = mobj.group('uploader_url')
2084         else:
2085             self._downloader.report_warning('unable to extract uploader nickname')
2086
2087         channel_id = self._html_search_meta(
2088             'channelId', video_webpage, 'channel id')
2089         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2090
2091         # thumbnail image
2092         # We try first to get a high quality image:
2093         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2094                             video_webpage, re.DOTALL)
2095         if m_thumb is not None:
2096             video_thumbnail = m_thumb.group(1)
2097         elif 'thumbnail_url' not in video_info:
2098             self._downloader.report_warning('unable to extract video thumbnail')
2099             video_thumbnail = None
2100         else:   # don't panic if we can't find it
2101             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2102
2103         # upload date
2104         upload_date = self._html_search_meta(
2105             'datePublished', video_webpage, 'upload date', default=None)
2106         if not upload_date:
2107             upload_date = self._search_regex(
2108                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2109                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2110                 video_webpage, 'upload date', default=None)
2111         upload_date = unified_strdate(upload_date)
2112
2113         video_license = self._html_search_regex(
2114             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2115             video_webpage, 'license', default=None)
2116
2117         m_music = re.search(
2118             r'''(?x)
2119                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2120                 <ul[^>]*>\s*
2121                 <li>(?P<title>.+?)
2122                 by (?P<creator>.+?)
2123                 (?:
2124                     \(.+?\)|
2125                     <a[^>]*
2126                         (?:
2127                             \bhref=["\']/red[^>]*>|             # drop possible
2128                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2129                         )
2130                     .*?
2131                 )?</li
2132             ''',
2133             video_webpage)
2134         if m_music:
2135             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2136             video_creator = clean_html(m_music.group('creator'))
2137         else:
2138             video_alt_title = video_creator = None
2139
2140         def extract_meta(field):
2141             return self._html_search_regex(
2142                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2143                 video_webpage, field, default=None)
2144
2145         track = extract_meta('Song')
2146         artist = extract_meta('Artist')
2147         album = extract_meta('Album')
2148
2149         # Youtube Music Auto-generated description
2150         release_date = release_year = None
2151         if video_description:
2152             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2153             if mobj:
2154                 if not track:
2155                     track = mobj.group('track').strip()
2156                 if not artist:
2157                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2158                 if not album:
2159                     album = mobj.group('album'.strip())
2160                 release_year = mobj.group('release_year')
2161                 release_date = mobj.group('release_date')
2162                 if release_date:
2163                     release_date = release_date.replace('-', '')
2164                     if not release_year:
2165                         release_year = int(release_date[:4])
2166                 if release_year:
2167                     release_year = int(release_year)
2168
2169         m_episode = re.search(
2170             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2171             video_webpage)
2172         if m_episode:
2173             series = unescapeHTML(m_episode.group('series'))
2174             season_number = int(m_episode.group('season'))
2175             episode_number = int(m_episode.group('episode'))
2176         else:
2177             series = season_number = episode_number = None
2178
2179         m_cat_container = self._search_regex(
2180             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2181             video_webpage, 'categories', default=None)
2182         if m_cat_container:
2183             category = self._html_search_regex(
2184                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2185                 default=None)
2186             video_categories = None if category is None else [category]
2187         else:
2188             video_categories = None
2189
2190         video_tags = [
2191             unescapeHTML(m.group('content'))
2192             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2193
2194         def _extract_count(count_name):
2195             return str_to_int(self._search_regex(
2196                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2197                 % re.escape(count_name),
2198                 video_webpage, count_name, default=None))
2199
2200         like_count = _extract_count('like')
2201         dislike_count = _extract_count('dislike')
2202
2203         if view_count is None:
2204             view_count = str_to_int(self._search_regex(
2205                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2206                 'view count', default=None))
2207
2208         # subtitles
2209         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2210         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2211
2212         video_duration = try_get(
2213             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2214         if not video_duration:
2215             video_duration = int_or_none(video_details.get('lengthSeconds'))
2216         if not video_duration:
2217             video_duration = parse_duration(self._html_search_meta(
2218                 'duration', video_webpage, 'video duration'))
2219
2220         # annotations
2221         video_annotations = None
2222         if self._downloader.params.get('writeannotations', False):
2223             video_annotations = self._extract_annotations(video_id)
2224
2225         chapters = self._extract_chapters(description_original, video_duration)
2226
2227         # Look for the DASH manifest
2228         if self._downloader.params.get('youtube_include_dash_manifest', True):
2229             dash_mpd_fatal = True
2230             for mpd_url in dash_mpds:
2231                 dash_formats = {}
2232                 try:
2233                     def decrypt_sig(mobj):
2234                         s = mobj.group(1)
2235                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2236                         return '/signature/%s' % dec_s
2237
2238                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2239
2240                     for df in self._extract_mpd_formats(
2241                             mpd_url, video_id, fatal=dash_mpd_fatal,
2242                             formats_dict=self._formats):
2243                         if not df.get('filesize'):
2244                             df['filesize'] = _extract_filesize(df['url'])
2245                         # Do not overwrite DASH format found in some previous DASH manifest
2246                         if df['format_id'] not in dash_formats:
2247                             dash_formats[df['format_id']] = df
2248                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2249                         # allow them to fail without bug report message if we already have
2250                         # some DASH manifest succeeded. This is temporary workaround to reduce
2251                         # burst of bug reports until we figure out the reason and whether it
2252                         # can be fixed at all.
2253                         dash_mpd_fatal = False
2254                 except (ExtractorError, KeyError) as e:
2255                     self.report_warning(
2256                         'Skipping DASH manifest: %r' % e, video_id)
2257                 if dash_formats:
2258                     # Remove the formats we found through non-DASH, they
2259                     # contain less info and it can be wrong, because we use
2260                     # fixed values (for example the resolution). See
2261                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2262                     # example.
2263                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2264                     formats.extend(dash_formats.values())
2265
2266         # Check for malformed aspect ratio
2267         stretched_m = re.search(
2268             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2269             video_webpage)
2270         if stretched_m:
2271             w = float(stretched_m.group('w'))
2272             h = float(stretched_m.group('h'))
2273             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2274             # We will only process correct ratios.
2275             if w > 0 and h > 0:
2276                 ratio = w / h
2277                 for f in formats:
2278                     if f.get('vcodec') != 'none':
2279                         f['stretched_ratio'] = ratio
2280
2281         if not formats:
2282             token = extract_token(video_info)
2283             if not token:
2284                 if 'reason' in video_info:
2285                     if 'The uploader has not made this video available in your country.' in video_info['reason']:
2286                         regions_allowed = self._html_search_meta(
2287                             'regionsAllowed', video_webpage, default=None)
2288                         countries = regions_allowed.split(',') if regions_allowed else None
2289                         self.raise_geo_restricted(
2290                             msg=video_info['reason'][0], countries=countries)
2291                     reason = video_info['reason'][0]
2292                     if 'Invalid parameters' in reason:
2293                         unavailable_message = extract_unavailable_message()
2294                         if unavailable_message:
2295                             reason = unavailable_message
2296                     raise ExtractorError(
2297                         'YouTube said: %s' % reason,
2298                         expected=True, video_id=video_id)
2299                 else:
2300                     raise ExtractorError(
2301                         '"token" parameter not in video info for unknown reason',
2302                         video_id=video_id)
2303
2304         self._sort_formats(formats)
2305
2306         self.mark_watched(video_id, video_info, player_response)
2307
2308         return {
2309             'id': video_id,
2310             'uploader': video_uploader,
2311             'uploader_id': video_uploader_id,
2312             'uploader_url': video_uploader_url,
2313             'channel_id': channel_id,
2314             'channel_url': channel_url,
2315             'upload_date': upload_date,
2316             'license': video_license,
2317             'creator': video_creator or artist,
2318             'title': video_title,
2319             'alt_title': video_alt_title or track,
2320             'thumbnail': video_thumbnail,
2321             'description': video_description,
2322             'categories': video_categories,
2323             'tags': video_tags,
2324             'subtitles': video_subtitles,
2325             'automatic_captions': automatic_captions,
2326             'duration': video_duration,
2327             'age_limit': 18 if age_gate else 0,
2328             'annotations': video_annotations,
2329             'chapters': chapters,
2330             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2331             'view_count': view_count,
2332             'like_count': like_count,
2333             'dislike_count': dislike_count,
2334             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2335             'formats': formats,
2336             'is_live': is_live,
2337             'start_time': start_time,
2338             'end_time': end_time,
2339             'series': series,
2340             'season_number': season_number,
2341             'episode_number': episode_number,
2342             'track': track,
2343             'artist': artist,
2344             'album': album,
2345             'release_date': release_date,
2346             'release_year': release_year,
2347         }
2348
2349
2350 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2351     IE_DESC = 'YouTube.com playlists'
2352     _VALID_URL = r"""(?x)(?:
2353                         (?:https?://)?
2354                         (?:\w+\.)?
2355                         (?:
2356                             (?:
2357                                 youtube\.com|
2358                                 invidio\.us
2359                             )
2360                             /
2361                             (?:
2362                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2363                                \? (?:.*?[&;])*? (?:p|a|list)=
2364                             |  p/
2365                             )|
2366                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2367                         )
2368                         (
2369                             (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2370                             # Top tracks, they can also include dots
2371                             |(?:MC)[\w\.]*
2372                         )
2373                         .*
2374                      |
2375                         (%(playlist_id)s)
2376                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2377     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2378     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2379     IE_NAME = 'youtube:playlist'
2380     _TESTS = [{
2381         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2382         'info_dict': {
2383             'title': 'ytdl test PL',
2384             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2385         },
2386         'playlist_count': 3,
2387     }, {
2388         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2389         'info_dict': {
2390             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2391             'title': 'YDL_Empty_List',
2392         },
2393         'playlist_count': 0,
2394         'skip': 'This playlist is private',
2395     }, {
2396         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2397         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2398         'info_dict': {
2399             'title': '29C3: Not my department',
2400             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2401         },
2402         'playlist_count': 95,
2403     }, {
2404         'note': 'issue #673',
2405         'url': 'PLBB231211A4F62143',
2406         'info_dict': {
2407             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2408             'id': 'PLBB231211A4F62143',
2409         },
2410         'playlist_mincount': 26,
2411     }, {
2412         'note': 'Large playlist',
2413         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2414         'info_dict': {
2415             'title': 'Uploads from Cauchemar',
2416             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2417         },
2418         'playlist_mincount': 799,
2419     }, {
2420         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2421         'info_dict': {
2422             'title': 'YDL_safe_search',
2423             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2424         },
2425         'playlist_count': 2,
2426         'skip': 'This playlist is private',
2427     }, {
2428         'note': 'embedded',
2429         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2430         'playlist_count': 4,
2431         'info_dict': {
2432             'title': 'JODA15',
2433             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2434         }
2435     }, {
2436         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2437         'playlist_mincount': 485,
2438         'info_dict': {
2439             'title': '2017 華語最新單曲 (2/24更新)',
2440             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2441         }
2442     }, {
2443         'note': 'Embedded SWF player',
2444         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2445         'playlist_count': 4,
2446         'info_dict': {
2447             'title': 'JODA7',
2448             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2449         }
2450     }, {
2451         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2452         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2453         'info_dict': {
2454             'title': 'Uploads from Interstellar Movie',
2455             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2456         },
2457         'playlist_mincount': 21,
2458     }, {
2459         # Playlist URL that does not actually serve a playlist
2460         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2461         'info_dict': {
2462             'id': 'FqZTN594JQw',
2463             'ext': 'webm',
2464             'title': "Smiley's People 01 detective, Adventure Series, Action",
2465             'uploader': 'STREEM',
2466             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2467             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2468             'upload_date': '20150526',
2469             'license': 'Standard YouTube License',
2470             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2471             'categories': ['People & Blogs'],
2472             'tags': list,
2473             'view_count': int,
2474             'like_count': int,
2475             'dislike_count': int,
2476         },
2477         'params': {
2478             'skip_download': True,
2479         },
2480         'add_ie': [YoutubeIE.ie_key()],
2481     }, {
2482         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2483         'info_dict': {
2484             'id': 'yeWKywCrFtk',
2485             'ext': 'mp4',
2486             'title': 'Small Scale Baler and Braiding Rugs',
2487             'uploader': 'Backus-Page House Museum',
2488             'uploader_id': 'backuspagemuseum',
2489             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2490             'upload_date': '20161008',
2491             'license': 'Standard YouTube License',
2492             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2493             'categories': ['Nonprofits & Activism'],
2494             'tags': list,
2495             'like_count': int,
2496             'dislike_count': int,
2497         },
2498         'params': {
2499             'noplaylist': True,
2500             'skip_download': True,
2501         },
2502     }, {
2503         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2504         'only_matching': True,
2505     }, {
2506         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2507         'only_matching': True,
2508     }, {
2509         # music album playlist
2510         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2511         'only_matching': True,
2512     }, {
2513         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2514         'only_matching': True,
2515     }]
2516
2517     def _real_initialize(self):
2518         self._login()
2519
2520     def _extract_mix(self, playlist_id):
2521         # The mixes are generated from a single video
2522         # the id of the playlist is just 'RD' + video_id
2523         ids = []
2524         last_id = playlist_id[-11:]
2525         for n in itertools.count(1):
2526             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2527             webpage = self._download_webpage(
2528                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2529             new_ids = orderedSet(re.findall(
2530                 r'''(?xs)data-video-username=".*?".*?
2531                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2532                 webpage))
2533             # Fetch new pages until all the videos are repeated, it seems that
2534             # there are always 51 unique videos.
2535             new_ids = [_id for _id in new_ids if _id not in ids]
2536             if not new_ids:
2537                 break
2538             ids.extend(new_ids)
2539             last_id = ids[-1]
2540
2541         url_results = self._ids_to_results(ids)
2542
2543         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2544         title_span = (
2545             search_title('playlist-title') or
2546             search_title('title long-title') or
2547             search_title('title'))
2548         title = clean_html(title_span)
2549
2550         return self.playlist_result(url_results, playlist_id, title)
2551
2552     def _extract_playlist(self, playlist_id):
2553         url = self._TEMPLATE_URL % playlist_id
2554         page = self._download_webpage(url, playlist_id)
2555
2556         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2557         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2558             match = match.strip()
2559             # Check if the playlist exists or is private
2560             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2561             if mobj:
2562                 reason = mobj.group('reason')
2563                 message = 'This playlist %s' % reason
2564                 if 'private' in reason:
2565                     message += ', use --username or --netrc to access it'
2566                 message += '.'
2567                 raise ExtractorError(message, expected=True)
2568             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2569                 raise ExtractorError(
2570                     'Invalid parameters. Maybe URL is incorrect.',
2571                     expected=True)
2572             elif re.match(r'[^<]*Choose your language[^<]*', match):
2573                 continue
2574             else:
2575                 self.report_warning('Youtube gives an alert message: ' + match)
2576
2577         playlist_title = self._html_search_regex(
2578             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2579             page, 'title', default=None)
2580
2581         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2582         uploader = self._search_regex(
2583             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2584             page, 'uploader', default=None)
2585         mobj = re.search(
2586             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2587             page)
2588         if mobj:
2589             uploader_id = mobj.group('uploader_id')
2590             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2591         else:
2592             uploader_id = uploader_url = None
2593
2594         has_videos = True
2595
2596         if not playlist_title:
2597             try:
2598                 # Some playlist URLs don't actually serve a playlist (e.g.
2599                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2600                 next(self._entries(page, playlist_id))
2601             except StopIteration:
2602                 has_videos = False
2603
2604         playlist = self.playlist_result(
2605             self._entries(page, playlist_id), playlist_id, playlist_title)
2606         playlist.update({
2607             'uploader': uploader,
2608             'uploader_id': uploader_id,
2609             'uploader_url': uploader_url,
2610         })
2611
2612         return has_videos, playlist
2613
2614     def _check_download_just_video(self, url, playlist_id):
2615         # Check if it's a video-specific URL
2616         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2617         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2618             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2619             'video id', default=None)
2620         if video_id:
2621             if self._downloader.params.get('noplaylist'):
2622                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2623                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2624             else:
2625                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2626                 return video_id, None
2627         return None, None
2628
2629     def _real_extract(self, url):
2630         # Extract playlist id
2631         mobj = re.match(self._VALID_URL, url)
2632         if mobj is None:
2633             raise ExtractorError('Invalid URL: %s' % url)
2634         playlist_id = mobj.group(1) or mobj.group(2)
2635
2636         video_id, video = self._check_download_just_video(url, playlist_id)
2637         if video:
2638             return video
2639
2640         if playlist_id.startswith(('RD', 'UL', 'PU')):
2641             # Mixes require a custom extraction process
2642             return self._extract_mix(playlist_id)
2643
2644         has_videos, playlist = self._extract_playlist(playlist_id)
2645         if has_videos or not video_id:
2646             return playlist
2647
2648         # Some playlist URLs don't actually serve a playlist (see
2649         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2650         # Fallback to plain video extraction if there is a video id
2651         # along with playlist id.
2652         return self.url_result(video_id, 'Youtube', video_id=video_id)
2653
2654
2655 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2656     IE_DESC = 'YouTube.com channels'
2657     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2658     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2659     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2660     IE_NAME = 'youtube:channel'
2661     _TESTS = [{
2662         'note': 'paginated channel',
2663         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2664         'playlist_mincount': 91,
2665         'info_dict': {
2666             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2667             'title': 'Uploads from lex will',
2668         }
2669     }, {
2670         'note': 'Age restricted channel',
2671         # from https://www.youtube.com/user/DeusExOfficial
2672         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2673         'playlist_mincount': 64,
2674         'info_dict': {
2675             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2676             'title': 'Uploads from Deus Ex',
2677         },
2678     }, {
2679         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2680         'only_matching': True,
2681     }]
2682
2683     @classmethod
2684     def suitable(cls, url):
2685         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2686                 else super(YoutubeChannelIE, cls).suitable(url))
2687
2688     def _build_template_url(self, url, channel_id):
2689         return self._TEMPLATE_URL % channel_id
2690
2691     def _real_extract(self, url):
2692         channel_id = self._match_id(url)
2693
2694         url = self._build_template_url(url, channel_id)
2695
2696         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2697         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2698         # otherwise fallback on channel by page extraction
2699         channel_page = self._download_webpage(
2700             url + '?view=57', channel_id,
2701             'Downloading channel page', fatal=False)
2702         if channel_page is False:
2703             channel_playlist_id = False
2704         else:
2705             channel_playlist_id = self._html_search_meta(
2706                 'channelId', channel_page, 'channel id', default=None)
2707             if not channel_playlist_id:
2708                 channel_url = self._html_search_meta(
2709                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2710                     channel_page, 'channel url', default=None)
2711                 if channel_url:
2712                     channel_playlist_id = self._search_regex(
2713                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2714                         channel_url, 'channel id', default=None)
2715         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2716             playlist_id = 'UU' + channel_playlist_id[2:]
2717             return self.url_result(
2718                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2719
2720         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2721         autogenerated = re.search(r'''(?x)
2722                 class="[^"]*?(?:
2723                     channel-header-autogenerated-label|
2724                     yt-channel-title-autogenerated
2725                 )[^"]*"''', channel_page) is not None
2726
2727         if autogenerated:
2728             # The videos are contained in a single page
2729             # the ajax pages can't be used, they are empty
2730             entries = [
2731                 self.url_result(
2732                     video_id, 'Youtube', video_id=video_id,
2733                     video_title=video_title)
2734                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2735             return self.playlist_result(entries, channel_id)
2736
2737         try:
2738             next(self._entries(channel_page, channel_id))
2739         except StopIteration:
2740             alert_message = self._html_search_regex(
2741                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2742                 channel_page, 'alert', default=None, group='alert')
2743             if alert_message:
2744                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2745
2746         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2747
2748
2749 class YoutubeUserIE(YoutubeChannelIE):
2750     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2751     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2752     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2753     IE_NAME = 'youtube:user'
2754
2755     _TESTS = [{
2756         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2757         'playlist_mincount': 320,
2758         'info_dict': {
2759             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2760             'title': 'Uploads from The Linux Foundation',
2761         }
2762     }, {
2763         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2764         # but not https://www.youtube.com/user/12minuteathlete/videos
2765         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2766         'playlist_mincount': 249,
2767         'info_dict': {
2768             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2769             'title': 'Uploads from 12 Minute Athlete',
2770         }
2771     }, {
2772         'url': 'ytuser:phihag',
2773         'only_matching': True,
2774     }, {
2775         'url': 'https://www.youtube.com/c/gametrailers',
2776         'only_matching': True,
2777     }, {
2778         'url': 'https://www.youtube.com/gametrailers',
2779         'only_matching': True,
2780     }, {
2781         # This channel is not available, geo restricted to JP
2782         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2783         'only_matching': True,
2784     }]
2785
2786     @classmethod
2787     def suitable(cls, url):
2788         # Don't return True if the url can be extracted with other youtube
2789         # extractor, the regex would is too permissive and it would match.
2790         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2791         if any(ie.suitable(url) for ie in other_yt_ies):
2792             return False
2793         else:
2794             return super(YoutubeUserIE, cls).suitable(url)
2795
2796     def _build_template_url(self, url, channel_id):
2797         mobj = re.match(self._VALID_URL, url)
2798         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2799
2800
2801 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2802     IE_DESC = 'YouTube.com live streams'
2803     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2804     IE_NAME = 'youtube:live'
2805
2806     _TESTS = [{
2807         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2808         'info_dict': {
2809             'id': 'a48o2S1cPoo',
2810             'ext': 'mp4',
2811             'title': 'The Young Turks - Live Main Show',
2812             'uploader': 'The Young Turks',
2813             'uploader_id': 'TheYoungTurks',
2814             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2815             'upload_date': '20150715',
2816             'license': 'Standard YouTube License',
2817             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2818             'categories': ['News & Politics'],
2819             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2820             'like_count': int,
2821             'dislike_count': int,
2822         },
2823         'params': {
2824             'skip_download': True,
2825         },
2826     }, {
2827         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2828         'only_matching': True,
2829     }, {
2830         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2831         'only_matching': True,
2832     }, {
2833         'url': 'https://www.youtube.com/TheYoungTurks/live',
2834         'only_matching': True,
2835     }]
2836
2837     def _real_extract(self, url):
2838         mobj = re.match(self._VALID_URL, url)
2839         channel_id = mobj.group('id')
2840         base_url = mobj.group('base_url')
2841         webpage = self._download_webpage(url, channel_id, fatal=False)
2842         if webpage:
2843             page_type = self._og_search_property(
2844                 'type', webpage, 'page type', default='')
2845             video_id = self._html_search_meta(
2846                 'videoId', webpage, 'video id', default=None)
2847             if page_type.startswith('video') and video_id and re.match(
2848                     r'^[0-9A-Za-z_-]{11}$', video_id):
2849                 return self.url_result(video_id, YoutubeIE.ie_key())
2850         return self.url_result(base_url)
2851
2852
2853 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2854     IE_DESC = 'YouTube.com user/channel playlists'
2855     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2856     IE_NAME = 'youtube:playlists'
2857
2858     _TESTS = [{
2859         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2860         'playlist_mincount': 4,
2861         'info_dict': {
2862             'id': 'ThirstForScience',
2863             'title': 'Thirst for Science',
2864         },
2865     }, {
2866         # with "Load more" button
2867         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2868         'playlist_mincount': 70,
2869         'info_dict': {
2870             'id': 'igorkle1',
2871             'title': 'Игорь Клейнер',
2872         },
2873     }, {
2874         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2875         'playlist_mincount': 17,
2876         'info_dict': {
2877             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2878             'title': 'Chem Player',
2879         },
2880     }]
2881
2882
2883 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2884     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2885
2886
2887 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2888     IE_DESC = 'YouTube.com searches'
2889     # there doesn't appear to be a real limit, for example if you search for
2890     # 'python' you get more than 8.000.000 results
2891     _MAX_RESULTS = float('inf')
2892     IE_NAME = 'youtube:search'
2893     _SEARCH_KEY = 'ytsearch'
2894     _EXTRA_QUERY_ARGS = {}
2895     _TESTS = []
2896
2897     def _get_n_results(self, query, n):
2898         """Get a specified number of results for a query"""
2899
2900         videos = []
2901         limit = n
2902
2903         url_query = {
2904             'search_query': query.encode('utf-8'),
2905         }
2906         url_query.update(self._EXTRA_QUERY_ARGS)
2907         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2908
2909         for pagenum in itertools.count(1):
2910             data = self._download_json(
2911                 result_url, video_id='query "%s"' % query,
2912                 note='Downloading page %s' % pagenum,
2913                 errnote='Unable to download API page',
2914                 query={'spf': 'navigate'})
2915             html_content = data[1]['body']['content']
2916
2917             if 'class="search-message' in html_content:
2918                 raise ExtractorError(
2919                     '[youtube] No video results', expected=True)
2920
2921             new_videos = list(self._process_page(html_content))
2922             videos += new_videos
2923             if not new_videos or len(videos) > limit:
2924                 break
2925             next_link = self._html_search_regex(
2926                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2927                 html_content, 'next link', default=None)
2928             if next_link is None:
2929                 break
2930             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2931
2932         if len(videos) > n:
2933             videos = videos[:n]
2934         return self.playlist_result(videos, query)
2935
2936
2937 class YoutubeSearchDateIE(YoutubeSearchIE):
2938     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2939     _SEARCH_KEY = 'ytsearchdate'
2940     IE_DESC = 'YouTube.com searches, newest videos first'
2941     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2942
2943
2944 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2945     IE_DESC = 'YouTube.com search URLs'
2946     IE_NAME = 'youtube:search_url'
2947     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2948     _TESTS = [{
2949         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2950         'playlist_mincount': 5,
2951         'info_dict': {
2952             'title': 'youtube-dl test video',
2953         }
2954     }, {
2955         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2956         'only_matching': True,
2957     }]
2958
2959     def _real_extract(self, url):
2960         mobj = re.match(self._VALID_URL, url)
2961         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2962         webpage = self._download_webpage(url, query)
2963         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2964
2965
2966 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2967     IE_DESC = 'YouTube.com (multi-season) shows'
2968     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2969     IE_NAME = 'youtube:show'
2970     _TESTS = [{
2971         'url': 'https://www.youtube.com/show/airdisasters',
2972         'playlist_mincount': 5,
2973         'info_dict': {
2974             'id': 'airdisasters',
2975             'title': 'Air Disasters',
2976         }
2977     }]
2978
2979     def _real_extract(self, url):
2980         playlist_id = self._match_id(url)
2981         return super(YoutubeShowIE, self)._real_extract(
2982             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2983
2984
2985 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2986     """
2987     Base class for feed extractors
2988     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2989     """
2990     _LOGIN_REQUIRED = True
2991
2992     @property
2993     def IE_NAME(self):
2994         return 'youtube:%s' % self._FEED_NAME
2995
2996     def _real_initialize(self):
2997         self._login()
2998
2999     def _entries(self, page):
3000         # The extraction process is the same as for playlists, but the regex
3001         # for the video ids doesn't contain an index
3002         ids = []
3003         more_widget_html = content_html = page
3004         for page_num in itertools.count(1):
3005             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3006
3007             # 'recommended' feed has infinite 'load more' and each new portion spins
3008             # the same videos in (sometimes) slightly different order, so we'll check
3009             # for unicity and break when portion has no new videos
3010             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3011             if not new_ids:
3012                 break
3013
3014             ids.extend(new_ids)
3015
3016             for entry in self._ids_to_results(new_ids):
3017                 yield entry
3018
3019             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3020             if not mobj:
3021                 break
3022
3023             more = self._download_json(
3024                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3025                 'Downloading page #%s' % page_num,
3026                 transform_source=uppercase_escape)
3027             content_html = more['content_html']
3028             more_widget_html = more['load_more_widget_html']
3029
3030     def _real_extract(self, url):
<