[youtube] Update invidious instance list (#29281)
[ytdl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import itertools
6 import json
7 import os.path
8 import random
9 import re
10 import traceback
11
12 from .common import InfoExtractor, SearchInfoExtractor
13 from ..compat import (
14     compat_chr,
15     compat_HTTPError,
16     compat_parse_qs,
17     compat_str,
18     compat_urllib_parse_unquote_plus,
19     compat_urllib_parse_urlencode,
20     compat_urllib_parse_urlparse,
21     compat_urlparse,
22 )
23 from ..jsinterp import JSInterpreter
24 from ..utils import (
25     ExtractorError,
26     clean_html,
27     dict_get,
28     float_or_none,
29     int_or_none,
30     mimetype2ext,
31     parse_codecs,
32     parse_duration,
33     qualities,
34     remove_start,
35     smuggle_url,
36     str_or_none,
37     str_to_int,
38     try_get,
39     unescapeHTML,
40     unified_strdate,
41     unsmuggle_url,
42     update_url_query,
43     url_or_none,
44     urlencode_postdata,
45     urljoin,
46 )
47
48
49 def parse_qs(url):
50     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
51
52
53 class YoutubeBaseInfoExtractor(InfoExtractor):
54     """Provide base functions for Youtube extractors"""
55     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
56     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
57
58     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
59     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
60     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
61
62     _NETRC_MACHINE = 'youtube'
63     # If True it will raise an error if no login info is provided
64     _LOGIN_REQUIRED = False
65
66     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
67
68     def _login(self):
69         """
70         Attempt to log in to YouTube.
71         True is returned if successful or skipped.
72         False is returned if login failed.
73
74         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
75         """
76         username, password = self._get_login_info()
77         # No authentication to be performed
78         if username is None:
79             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
80                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
81             return True
82
83         login_page = self._download_webpage(
84             self._LOGIN_URL, None,
85             note='Downloading login page',
86             errnote='unable to fetch login page', fatal=False)
87         if login_page is False:
88             return
89
90         login_form = self._hidden_inputs(login_page)
91
92         def req(url, f_req, note, errnote):
93             data = login_form.copy()
94             data.update({
95                 'pstMsg': 1,
96                 'checkConnection': 'youtube',
97                 'checkedDomains': 'youtube',
98                 'hl': 'en',
99                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
100                 'f.req': json.dumps(f_req),
101                 'flowName': 'GlifWebSignIn',
102                 'flowEntry': 'ServiceLogin',
103                 # TODO: reverse actual botguard identifier generation algo
104                 'bgRequest': '["identifier",""]',
105             })
106             return self._download_json(
107                 url, None, note=note, errnote=errnote,
108                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
109                 fatal=False,
110                 data=urlencode_postdata(data), headers={
111                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
112                     'Google-Accounts-XSRF': 1,
113                 })
114
115         def warn(message):
116             self._downloader.report_warning(message)
117
118         lookup_req = [
119             username,
120             None, [], None, 'US', None, None, 2, False, True,
121             [
122                 None, None,
123                 [2, 1, None, 1,
124                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
125                  None, [], 4],
126                 1, [None, None, []], None, None, None, True
127             ],
128             username,
129         ]
130
131         lookup_results = req(
132             self._LOOKUP_URL, lookup_req,
133             'Looking up account info', 'Unable to look up account info')
134
135         if lookup_results is False:
136             return False
137
138         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
139         if not user_hash:
140             warn('Unable to extract user hash')
141             return False
142
143         challenge_req = [
144             user_hash,
145             None, 1, None, [1, None, None, None, [password, None, True]],
146             [
147                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
148                 1, [None, None, []], None, None, None, True
149             ]]
150
151         challenge_results = req(
152             self._CHALLENGE_URL, challenge_req,
153             'Logging in', 'Unable to log in')
154
155         if challenge_results is False:
156             return
157
158         login_res = try_get(challenge_results, lambda x: x[0][5], list)
159         if login_res:
160             login_msg = try_get(login_res, lambda x: x[5], compat_str)
161             warn(
162                 'Unable to login: %s' % 'Invalid password'
163                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
164             return False
165
166         res = try_get(challenge_results, lambda x: x[0][-1], list)
167         if not res:
168             warn('Unable to extract result entry')
169             return False
170
171         login_challenge = try_get(res, lambda x: x[0][0], list)
172         if login_challenge:
173             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
174             if challenge_str == 'TWO_STEP_VERIFICATION':
175                 # SEND_SUCCESS - TFA code has been successfully sent to phone
176                 # QUOTA_EXCEEDED - reached the limit of TFA codes
177                 status = try_get(login_challenge, lambda x: x[5], compat_str)
178                 if status == 'QUOTA_EXCEEDED':
179                     warn('Exceeded the limit of TFA codes, try later')
180                     return False
181
182                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
183                 if not tl:
184                     warn('Unable to extract TL')
185                     return False
186
187                 tfa_code = self._get_tfa_info('2-step verification code')
188
189                 if not tfa_code:
190                     warn(
191                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
192                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
193                     return False
194
195                 tfa_code = remove_start(tfa_code, 'G-')
196
197                 tfa_req = [
198                     user_hash, None, 2, None,
199                     [
200                         9, None, None, None, None, None, None, None,
201                         [None, tfa_code, True, 2]
202                     ]]
203
204                 tfa_results = req(
205                     self._TFA_URL.format(tl), tfa_req,
206                     'Submitting TFA code', 'Unable to submit TFA code')
207
208                 if tfa_results is False:
209                     return False
210
211                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
212                 if tfa_res:
213                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
214                     warn(
215                         'Unable to finish TFA: %s' % 'Invalid TFA code'
216                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
217                     return False
218
219                 check_cookie_url = try_get(
220                     tfa_results, lambda x: x[0][-1][2], compat_str)
221             else:
222                 CHALLENGES = {
223                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
224                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
225                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
226                 }
227                 challenge = CHALLENGES.get(
228                     challenge_str,
229                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
230                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
231                 return False
232         else:
233             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
234
235         if not check_cookie_url:
236             warn('Unable to extract CheckCookie URL')
237             return False
238
239         check_cookie_results = self._download_webpage(
240             check_cookie_url, None, 'Checking cookie', fatal=False)
241
242         if check_cookie_results is False:
243             return False
244
245         if 'https://myaccount.google.com/' not in check_cookie_results:
246             warn('Unable to log in')
247             return False
248
249         return True
250
251     def _initialize_consent(self):
252         cookies = self._get_cookies('https://www.youtube.com/')
253         if cookies.get('__Secure-3PSID'):
254             return
255         consent_id = None
256         consent = cookies.get('CONSENT')
257         if consent:
258             if 'YES' in consent.value:
259                 return
260             consent_id = self._search_regex(
261                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
262         if not consent_id:
263             consent_id = random.randint(100, 999)
264         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
265
266     def _real_initialize(self):
267         self._initialize_consent()
268         if self._downloader is None:
269             return
270         if not self._login():
271             return
272
273     _DEFAULT_API_DATA = {
274         'context': {
275             'client': {
276                 'clientName': 'WEB',
277                 'clientVersion': '2.20201021.03.00',
278             }
279         },
280     }
281
282     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
283     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
284     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
285
286     def _call_api(self, ep, query, video_id, fatal=True):
287         data = self._DEFAULT_API_DATA.copy()
288         data.update(query)
289
290         return self._download_json(
291             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
292             note='Downloading API JSON', errnote='Unable to download API page',
293             data=json.dumps(data).encode('utf8'), fatal=fatal,
294             headers={'content-type': 'application/json'},
295             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
296
297     def _extract_yt_initial_data(self, video_id, webpage):
298         return self._parse_json(
299             self._search_regex(
300                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
301                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
302             video_id)
303
304     def _extract_ytcfg(self, video_id, webpage):
305         return self._parse_json(
306             self._search_regex(
307                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
308                 default='{}'), video_id, fatal=False) or {}
309
310     def _extract_video(self, renderer):
311         video_id = renderer['videoId']
312         title = try_get(
313             renderer,
314             (lambda x: x['title']['runs'][0]['text'],
315              lambda x: x['title']['simpleText']), compat_str)
316         description = try_get(
317             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
318             compat_str)
319         duration = parse_duration(try_get(
320             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
321         view_count_text = try_get(
322             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
323         view_count = str_to_int(self._search_regex(
324             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
325             'view count', default=None))
326         uploader = try_get(
327             renderer,
328             (lambda x: x['ownerText']['runs'][0]['text'],
329              lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
330         return {
331             '_type': 'url',
332             'ie_key': YoutubeIE.ie_key(),
333             'id': video_id,
334             'url': video_id,
335             'title': title,
336             'description': description,
337             'duration': duration,
338             'view_count': view_count,
339             'uploader': uploader,
340         }
341
342
343 class YoutubeIE(YoutubeBaseInfoExtractor):
344     IE_DESC = 'YouTube.com'
345     _INVIDIOUS_SITES = (
346         # invidious-redirect websites
347         r'(?:www\.)?redirect\.invidious\.io',
348         r'(?:(?:www|dev)\.)?invidio\.us',
349         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
350         r'(?:(?:www|no)\.)?invidiou\.sh',
351         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
352         r'(?:www\.)?invidious\.kabi\.tk',
353         r'(?:www\.)?invidious\.13ad\.de',
354         r'(?:www\.)?invidious\.mastodon\.host',
355         r'(?:www\.)?invidious\.zapashcanon\.fr',
356         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
357         r'(?:www\.)?invidious\.tinfoil-hat\.net',
358         r'(?:www\.)?invidious\.himiko\.cloud',
359         r'(?:www\.)?invidious\.reallyancient\.tech',
360         r'(?:www\.)?invidious\.tube',
361         r'(?:www\.)?invidiou\.site',
362         r'(?:www\.)?invidious\.site',
363         r'(?:www\.)?invidious\.xyz',
364         r'(?:www\.)?invidious\.nixnet\.xyz',
365         r'(?:www\.)?invidious\.048596\.xyz',
366         r'(?:www\.)?invidious\.drycat\.fr',
367         r'(?:www\.)?inv\.skyn3t\.in',
368         r'(?:www\.)?tube\.poal\.co',
369         r'(?:www\.)?tube\.connect\.cafe',
370         r'(?:www\.)?vid\.wxzm\.sx',
371         r'(?:www\.)?vid\.mint\.lgbt',
372         r'(?:www\.)?vid\.puffyan\.us',
373         r'(?:www\.)?yewtu\.be',
374         r'(?:www\.)?yt\.elukerio\.org',
375         r'(?:www\.)?yt\.lelux\.fi',
376         r'(?:www\.)?invidious\.ggc-project\.de',
377         r'(?:www\.)?yt\.maisputain\.ovh',
378         r'(?:www\.)?ytprivate\.com',
379         r'(?:www\.)?invidious\.13ad\.de',
380         r'(?:www\.)?invidious\.toot\.koeln',
381         r'(?:www\.)?invidious\.fdn\.fr',
382         r'(?:www\.)?watch\.nettohikari\.com',
383         r'(?:www\.)?invidious\.namazso\.eu',
384         r'(?:www\.)?invidious\.silkky\.cloud',
385         r'(?:www\.)?invidious\.exonip\.de',
386         r'(?:www\.)?invidious\.riverside\.rocks',
387         r'(?:www\.)?invidious\.blamefran\.net',
388         r'(?:www\.)?invidious\.moomoo\.de',
389         r'(?:www\.)?ytb\.trom\.tf',
390         r'(?:www\.)?yt\.cyberhost\.uk',
391         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
392         r'(?:www\.)?qklhadlycap4cnod\.onion',
393         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
394         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
395         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
396         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
397         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
398         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
399         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
400         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
401         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
402         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
403     )
404     _VALID_URL = r"""(?x)^
405                      (
406                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
407                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
408                             (?:www\.)?deturl\.com/www\.youtube\.com|
409                             (?:www\.)?pwnyoutube\.com|
410                             (?:www\.)?hooktube\.com|
411                             (?:www\.)?yourepeat\.com|
412                             tube\.majestyc\.net|
413                             %(invidious)s|
414                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
415                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
416                          (?:                                                  # the various things that can precede the ID:
417                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
418                              |(?:                                             # or the v= param in all its forms
419                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
420                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
421                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
422                                  v=
423                              )
424                          ))
425                          |(?:
426                             youtu\.be|                                        # just youtu.be/xxxx
427                             vid\.plus|                                        # or vid.plus/xxxx
428                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
429                             %(invidious)s
430                          )/
431                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
432                          )
433                      )?                                                       # all until now is optional -> you can pass the naked ID
434                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
435                      (?(1).+)?                                                # if we found the ID, everything can follow
436                      $""" % {
437         'invidious': '|'.join(_INVIDIOUS_SITES),
438     }
439     _PLAYER_INFO_RE = (
440         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
441         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
442         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
443     )
444     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
445
446     _GEO_BYPASS = False
447
448     IE_NAME = 'youtube'
449     _TESTS = [
450         {
451             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
452             'info_dict': {
453                 'id': 'BaW_jenozKc',
454                 'ext': 'mp4',
455                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
456                 'uploader': 'Philipp Hagemeister',
457                 'uploader_id': 'phihag',
458                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
459                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
460                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
461                 'upload_date': '20121002',
462                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
463                 'categories': ['Science & Technology'],
464                 'tags': ['youtube-dl'],
465                 'duration': 10,
466                 'view_count': int,
467                 'like_count': int,
468                 'dislike_count': int,
469                 'start_time': 1,
470                 'end_time': 9,
471             }
472         },
473         {
474             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
475             'note': 'Embed-only video (#1746)',
476             'info_dict': {
477                 'id': 'yZIXLfi8CZQ',
478                 'ext': 'mp4',
479                 'upload_date': '20120608',
480                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
481                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
482                 'uploader': 'SET India',
483                 'uploader_id': 'setindia',
484                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
485                 'age_limit': 18,
486             },
487             'skip': 'Private video',
488         },
489         {
490             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
491             'note': 'Use the first video ID in the URL',
492             'info_dict': {
493                 'id': 'BaW_jenozKc',
494                 'ext': 'mp4',
495                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
496                 'uploader': 'Philipp Hagemeister',
497                 'uploader_id': 'phihag',
498                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
499                 'upload_date': '20121002',
500                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
501                 'categories': ['Science & Technology'],
502                 'tags': ['youtube-dl'],
503                 'duration': 10,
504                 'view_count': int,
505                 'like_count': int,
506                 'dislike_count': int,
507             },
508             'params': {
509                 'skip_download': True,
510             },
511         },
512         {
513             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
514             'note': '256k DASH audio (format 141) via DASH manifest',
515             'info_dict': {
516                 'id': 'a9LDPn-MO4I',
517                 'ext': 'm4a',
518                 'upload_date': '20121002',
519                 'uploader_id': '8KVIDEO',
520                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
521                 'description': '',
522                 'uploader': '8KVIDEO',
523                 'title': 'UHDTV TEST 8K VIDEO.mp4'
524             },
525             'params': {
526                 'youtube_include_dash_manifest': True,
527                 'format': '141',
528             },
529             'skip': 'format 141 not served anymore',
530         },
531         # DASH manifest with encrypted signature
532         {
533             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
534             'info_dict': {
535                 'id': 'IB3lcPjvWLA',
536                 'ext': 'm4a',
537                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
538                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
539                 'duration': 244,
540                 'uploader': 'AfrojackVEVO',
541                 'uploader_id': 'AfrojackVEVO',
542                 'upload_date': '20131011',
543                 'abr': 129.495,
544             },
545             'params': {
546                 'youtube_include_dash_manifest': True,
547                 'format': '141/bestaudio[ext=m4a]',
548             },
549         },
550         # Controversy video
551         {
552             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
553             'info_dict': {
554                 'id': 'T4XJQO3qol8',
555                 'ext': 'mp4',
556                 'duration': 219,
557                 'upload_date': '20100909',
558                 'uploader': 'Amazing Atheist',
559                 'uploader_id': 'TheAmazingAtheist',
560                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
561                 'title': 'Burning Everyone\'s Koran',
562                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
563             }
564         },
565         # Normal age-gate video (No vevo, embed allowed), available via embed page
566         {
567             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
568             'info_dict': {
569                 'id': 'HtVdAasjOgU',
570                 'ext': 'mp4',
571                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
572                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
573                 'duration': 142,
574                 'uploader': 'The Witcher',
575                 'uploader_id': 'WitcherGame',
576                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
577                 'upload_date': '20140605',
578                 'age_limit': 18,
579             },
580         },
581         {
582             # Age-gated video only available with authentication (unavailable
583             # via embed page workaround)
584             'url': 'XgnwCQzjau8',
585             'only_matching': True,
586         },
587         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
588         # YouTube Red ad is not captured for creator
589         {
590             'url': '__2ABJjxzNo',
591             'info_dict': {
592                 'id': '__2ABJjxzNo',
593                 'ext': 'mp4',
594                 'duration': 266,
595                 'upload_date': '20100430',
596                 'uploader_id': 'deadmau5',
597                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
598                 'creator': 'deadmau5',
599                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
600                 'uploader': 'deadmau5',
601                 'title': 'Deadmau5 - Some Chords (HD)',
602                 'alt_title': 'Some Chords',
603             },
604             'expected_warnings': [
605                 'DASH manifest missing',
606             ]
607         },
608         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
609         {
610             'url': 'lqQg6PlCWgI',
611             'info_dict': {
612                 'id': 'lqQg6PlCWgI',
613                 'ext': 'mp4',
614                 'duration': 6085,
615                 'upload_date': '20150827',
616                 'uploader_id': 'olympic',
617                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
618                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
619                 'uploader': 'Olympic',
620                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
621             },
622             'params': {
623                 'skip_download': 'requires avconv',
624             }
625         },
626         # Non-square pixels
627         {
628             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
629             'info_dict': {
630                 'id': '_b-2C3KPAM0',
631                 'ext': 'mp4',
632                 'stretched_ratio': 16 / 9.,
633                 'duration': 85,
634                 'upload_date': '20110310',
635                 'uploader_id': 'AllenMeow',
636                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
637                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
638                 'uploader': '孫ᄋᄅ',
639                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
640             },
641         },
642         # url_encoded_fmt_stream_map is empty string
643         {
644             'url': 'qEJwOuvDf7I',
645             'info_dict': {
646                 'id': 'qEJwOuvDf7I',
647                 'ext': 'webm',
648                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
649                 'description': '',
650                 'upload_date': '20150404',
651                 'uploader_id': 'spbelect',
652                 'uploader': 'Наблюдатели Петербурга',
653             },
654             'params': {
655                 'skip_download': 'requires avconv',
656             },
657             'skip': 'This live event has ended.',
658         },
659         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
660         {
661             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
662             'info_dict': {
663                 'id': 'FIl7x6_3R5Y',
664                 'ext': 'webm',
665                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
666                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
667                 'duration': 220,
668                 'upload_date': '20150625',
669                 'uploader_id': 'dorappi2000',
670                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
671                 'uploader': 'dorappi2000',
672                 'formats': 'mincount:31',
673             },
674             'skip': 'not actual anymore',
675         },
676         # DASH manifest with segment_list
677         {
678             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
679             'md5': '8ce563a1d667b599d21064e982ab9e31',
680             'info_dict': {
681                 'id': 'CsmdDsKjzN8',
682                 'ext': 'mp4',
683                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
684                 'uploader': 'Airtek',
685                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
686                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
687                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
688             },
689             'params': {
690                 'youtube_include_dash_manifest': True,
691                 'format': '135',  # bestvideo
692             },
693             'skip': 'This live event has ended.',
694         },
695         {
696             # Multifeed videos (multiple cameras), URL is for Main Camera
697             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
698             'info_dict': {
699                 'id': 'jvGDaLqkpTg',
700                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
701                 'description': 'md5:e03b909557865076822aa169218d6a5d',
702             },
703             'playlist': [{
704                 'info_dict': {
705                     'id': 'jvGDaLqkpTg',
706                     'ext': 'mp4',
707                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
708                     'description': 'md5:e03b909557865076822aa169218d6a5d',
709                     'duration': 10643,
710                     'upload_date': '20161111',
711                     'uploader': 'Team PGP',
712                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
713                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
714                 },
715             }, {
716                 'info_dict': {
717                     'id': '3AKt1R1aDnw',
718                     'ext': 'mp4',
719                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
720                     'description': 'md5:e03b909557865076822aa169218d6a5d',
721                     'duration': 10991,
722                     'upload_date': '20161111',
723                     'uploader': 'Team PGP',
724                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
725                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
726                 },
727             }, {
728                 'info_dict': {
729                     'id': 'RtAMM00gpVc',
730                     'ext': 'mp4',
731                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
732                     'description': 'md5:e03b909557865076822aa169218d6a5d',
733                     'duration': 10995,
734                     'upload_date': '20161111',
735                     'uploader': 'Team PGP',
736                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
737                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
738                 },
739             }, {
740                 'info_dict': {
741                     'id': '6N2fdlP3C5U',
742                     'ext': 'mp4',
743                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
744                     'description': 'md5:e03b909557865076822aa169218d6a5d',
745                     'duration': 10990,
746                     'upload_date': '20161111',
747                     'uploader': 'Team PGP',
748                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
749                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
750                 },
751             }],
752             'params': {
753                 'skip_download': True,
754             },
755         },
756         {
757             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
758             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
759             'info_dict': {
760                 'id': 'gVfLd0zydlo',
761                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
762             },
763             'playlist_count': 2,
764             'skip': 'Not multifeed anymore',
765         },
766         {
767             'url': 'https://vid.plus/FlRa-iH7PGw',
768             'only_matching': True,
769         },
770         {
771             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
772             'only_matching': True,
773         },
774         {
775             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
776             # Also tests cut-off URL expansion in video description (see
777             # https://github.com/ytdl-org/youtube-dl/issues/1892,
778             # https://github.com/ytdl-org/youtube-dl/issues/8164)
779             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
780             'info_dict': {
781                 'id': 'lsguqyKfVQg',
782                 'ext': 'mp4',
783                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
784                 'alt_title': 'Dark Walk - Position Music',
785                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
786                 'duration': 133,
787                 'upload_date': '20151119',
788                 'uploader_id': 'IronSoulElf',
789                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
790                 'uploader': 'IronSoulElf',
791                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
792                 'track': 'Dark Walk - Position Music',
793                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
794                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
795             },
796             'params': {
797                 'skip_download': True,
798             },
799         },
800         {
801             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
802             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
803             'only_matching': True,
804         },
805         {
806             # Video with yt:stretch=17:0
807             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
808             'info_dict': {
809                 'id': 'Q39EVAstoRM',
810                 'ext': 'mp4',
811                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
812                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
813                 'upload_date': '20151107',
814                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
815                 'uploader': 'CH GAMER DROID',
816             },
817             'params': {
818                 'skip_download': True,
819             },
820             'skip': 'This video does not exist.',
821         },
822         {
823             # Video with incomplete 'yt:stretch=16:'
824             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
825             'only_matching': True,
826         },
827         {
828             # Video licensed under Creative Commons
829             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
830             'info_dict': {
831                 'id': 'M4gD1WSo5mA',
832                 'ext': 'mp4',
833                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
834                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
835                 'duration': 721,
836                 'upload_date': '20150127',
837                 'uploader_id': 'BerkmanCenter',
838                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
839                 'uploader': 'The Berkman Klein Center for Internet & Society',
840                 'license': 'Creative Commons Attribution license (reuse allowed)',
841             },
842             'params': {
843                 'skip_download': True,
844             },
845         },
846         {
847             # Channel-like uploader_url
848             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
849             'info_dict': {
850                 'id': 'eQcmzGIKrzg',
851                 'ext': 'mp4',
852                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
853                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
854                 'duration': 4060,
855                 'upload_date': '20151119',
856                 'uploader': 'Bernie Sanders',
857                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
858                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
859                 'license': 'Creative Commons Attribution license (reuse allowed)',
860             },
861             'params': {
862                 'skip_download': True,
863             },
864         },
865         {
866             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
867             'only_matching': True,
868         },
869         {
870             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
871             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
872             'only_matching': True,
873         },
874         {
875             # Rental video preview
876             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
877             'info_dict': {
878                 'id': 'uGpuVWrhIzE',
879                 'ext': 'mp4',
880                 'title': 'Piku - Trailer',
881                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
882                 'upload_date': '20150811',
883                 'uploader': 'FlixMatrix',
884                 'uploader_id': 'FlixMatrixKaravan',
885                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
886                 'license': 'Standard YouTube License',
887             },
888             'params': {
889                 'skip_download': True,
890             },
891             'skip': 'This video is not available.',
892         },
893         {
894             # YouTube Red video with episode data
895             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
896             'info_dict': {
897                 'id': 'iqKdEhx-dD4',
898                 'ext': 'mp4',
899                 'title': 'Isolation - Mind Field (Ep 1)',
900                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
901                 'duration': 2085,
902                 'upload_date': '20170118',
903                 'uploader': 'Vsauce',
904                 'uploader_id': 'Vsauce',
905                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
906                 'series': 'Mind Field',
907                 'season_number': 1,
908                 'episode_number': 1,
909             },
910             'params': {
911                 'skip_download': True,
912             },
913             'expected_warnings': [
914                 'Skipping DASH manifest',
915             ],
916         },
917         {
918             # The following content has been identified by the YouTube community
919             # as inappropriate or offensive to some audiences.
920             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
921             'info_dict': {
922                 'id': '6SJNVb0GnPI',
923                 'ext': 'mp4',
924                 'title': 'Race Differences in Intelligence',
925                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
926                 'duration': 965,
927                 'upload_date': '20140124',
928                 'uploader': 'New Century Foundation',
929                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
930                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
931             },
932             'params': {
933                 'skip_download': True,
934             },
935             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
936         },
937         {
938             # itag 212
939             'url': '1t24XAntNCY',
940             'only_matching': True,
941         },
942         {
943             # geo restricted to JP
944             'url': 'sJL6WA-aGkQ',
945             'only_matching': True,
946         },
947         {
948             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
949             'only_matching': True,
950         },
951         {
952             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
953             'only_matching': True,
954         },
955         {
956             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
957             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
958             'only_matching': True,
959         },
960         {
961             # DRM protected
962             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
963             'only_matching': True,
964         },
965         {
966             # Video with unsupported adaptive stream type formats
967             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
968             'info_dict': {
969                 'id': 'Z4Vy8R84T1U',
970                 'ext': 'mp4',
971                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
972                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
973                 'duration': 433,
974                 'upload_date': '20130923',
975                 'uploader': 'Amelia Putri Harwita',
976                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
977                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
978                 'formats': 'maxcount:10',
979             },
980             'params': {
981                 'skip_download': True,
982                 'youtube_include_dash_manifest': False,
983             },
984             'skip': 'not actual anymore',
985         },
986         {
987             # Youtube Music Auto-generated description
988             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
989             'info_dict': {
990                 'id': 'MgNrAu2pzNs',
991                 'ext': 'mp4',
992                 'title': 'Voyeur Girl',
993                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
994                 'upload_date': '20190312',
995                 'uploader': 'Stephen - Topic',
996                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
997                 'artist': 'Stephen',
998                 'track': 'Voyeur Girl',
999                 'album': 'it\'s too much love to know my dear',
1000                 'release_date': '20190313',
1001                 'release_year': 2019,
1002             },
1003             'params': {
1004                 'skip_download': True,
1005             },
1006         },
1007         {
1008             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1009             'only_matching': True,
1010         },
1011         {
1012             # invalid -> valid video id redirection
1013             'url': 'DJztXj2GPfl',
1014             'info_dict': {
1015                 'id': 'DJztXj2GPfk',
1016                 'ext': 'mp4',
1017                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1018                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1019                 'upload_date': '20090125',
1020                 'uploader': 'Prochorowka',
1021                 'uploader_id': 'Prochorowka',
1022                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1023                 'artist': 'Panjabi MC',
1024                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1025                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1026             },
1027             'params': {
1028                 'skip_download': True,
1029             },
1030             'skip': 'Video unavailable',
1031         },
1032         {
1033             # empty description results in an empty string
1034             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1035             'info_dict': {
1036                 'id': 'x41yOUIvK2k',
1037                 'ext': 'mp4',
1038                 'title': 'IMG 3456',
1039                 'description': '',
1040                 'upload_date': '20170613',
1041                 'uploader_id': 'ElevageOrVert',
1042                 'uploader': 'ElevageOrVert',
1043             },
1044             'params': {
1045                 'skip_download': True,
1046             },
1047         },
1048         {
1049             # with '};' inside yt initial data (see [1])
1050             # see [2] for an example with '};' inside ytInitialPlayerResponse
1051             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1052             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1053             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1054             'info_dict': {
1055                 'id': 'CHqg6qOn4no',
1056                 'ext': 'mp4',
1057                 'title': 'Part 77   Sort a list of simple types in c#',
1058                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1059                 'upload_date': '20130831',
1060                 'uploader_id': 'kudvenkat',
1061                 'uploader': 'kudvenkat',
1062             },
1063             'params': {
1064                 'skip_download': True,
1065             },
1066         },
1067         {
1068             # another example of '};' in ytInitialData
1069             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1070             'only_matching': True,
1071         },
1072         {
1073             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1074             'only_matching': True,
1075         },
1076         {
1077             # https://github.com/ytdl-org/youtube-dl/pull/28094
1078             'url': 'OtqTfy26tG0',
1079             'info_dict': {
1080                 'id': 'OtqTfy26tG0',
1081                 'ext': 'mp4',
1082                 'title': 'Burn Out',
1083                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1084                 'upload_date': '20141120',
1085                 'uploader': 'The Cinematic Orchestra - Topic',
1086                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1087                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1088                 'artist': 'The Cinematic Orchestra',
1089                 'track': 'Burn Out',
1090                 'album': 'Every Day',
1091                 'release_data': None,
1092                 'release_year': None,
1093             },
1094             'params': {
1095                 'skip_download': True,
1096             },
1097         },
1098         {
1099             # controversial video, only works with bpctr when authenticated with cookies
1100             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1101             'only_matching': True,
1102         },
1103         {
1104             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1105             'url': 'cBvYw8_A0vQ',
1106             'info_dict': {
1107                 'id': 'cBvYw8_A0vQ',
1108                 'ext': 'mp4',
1109                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1110                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1111                 'upload_date': '20201120',
1112                 'uploader': 'Walk around Japan',
1113                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1114                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1115             },
1116             'params': {
1117                 'skip_download': True,
1118             },
1119         },
1120     ]
1121     _formats = {
1122         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1123         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1124         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1125         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1126         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1127         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1128         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1129         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1130         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1131         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1132         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1133         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1134         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1135         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1136         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1137         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1138         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1139         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1140
1141
1142         # 3D videos
1143         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1144         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1145         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1146         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1147         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1148         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1149         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1150
1151         # Apple HTTP Live Streaming
1152         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1153         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1154         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1155         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1156         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1157         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1158         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1159         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1160
1161         # DASH mp4 video
1162         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1163         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1164         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1165         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1166         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1167         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1168         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1169         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1170         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1171         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1172         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1173         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1174
1175         # Dash mp4 audio
1176         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1177         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1178         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1179         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1180         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1181         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1182         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1183
1184         # Dash webm
1185         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1186         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1187         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1188         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1189         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1190         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1191         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1192         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1193         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1194         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1195         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1196         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1197         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1198         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1199         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1200         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1201         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1202         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1203         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1204         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1205         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1206         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1207
1208         # Dash webm audio
1209         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1210         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1211
1212         # Dash webm audio with opus inside
1213         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1214         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1215         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1216
1217         # RTMP (unnamed)
1218         '_rtmp': {'protocol': 'rtmp'},
1219
1220         # av01 video only formats sometimes served with "unknown" codecs
1221         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1222         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1223         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1224         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1225     }
1226
1227     @classmethod
1228     def suitable(cls, url):
1229         # Hack for lazy extractors until more generic solution is implemented
1230         # (see #28780)
1231         from .youtube import parse_qs
1232         qs = parse_qs(url)
1233         if qs.get('list', [None])[0]:
1234             return False
1235         return super(YoutubeIE, cls).suitable(url)
1236
1237     def __init__(self, *args, **kwargs):
1238         super(YoutubeIE, self).__init__(*args, **kwargs)
1239         self._code_cache = {}
1240         self._player_cache = {}
1241
1242     def _signature_cache_id(self, example_sig):
1243         """ Return a string representation of a signature """
1244         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1245
1246     @classmethod
1247     def _extract_player_info(cls, player_url):
1248         for player_re in cls._PLAYER_INFO_RE:
1249             id_m = re.search(player_re, player_url)
1250             if id_m:
1251                 break
1252         else:
1253             raise ExtractorError('Cannot identify player %r' % player_url)
1254         return id_m.group('id')
1255
1256     def _extract_signature_function(self, video_id, player_url, example_sig):
1257         player_id = self._extract_player_info(player_url)
1258
1259         # Read from filesystem cache
1260         func_id = 'js_%s_%s' % (
1261             player_id, self._signature_cache_id(example_sig))
1262         assert os.path.basename(func_id) == func_id
1263
1264         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1265         if cache_spec is not None:
1266             return lambda s: ''.join(s[i] for i in cache_spec)
1267
1268         if player_id not in self._code_cache:
1269             self._code_cache[player_id] = self._download_webpage(
1270                 player_url, video_id,
1271                 note='Downloading player ' + player_id,
1272                 errnote='Download of %s failed' % player_url)
1273         code = self._code_cache[player_id]
1274         res = self._parse_sig_js(code)
1275
1276         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1277         cache_res = res(test_string)
1278         cache_spec = [ord(c) for c in cache_res]
1279
1280         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1281         return res
1282
1283     def _print_sig_code(self, func, example_sig):
1284         def gen_sig_code(idxs):
1285             def _genslice(start, end, step):
1286                 starts = '' if start == 0 else str(start)
1287                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1288                 steps = '' if step == 1 else (':%d' % step)
1289                 return 's[%s%s%s]' % (starts, ends, steps)
1290
1291             step = None
1292             # Quelch pyflakes warnings - start will be set when step is set
1293             start = '(Never used)'
1294             for i, prev in zip(idxs[1:], idxs[:-1]):
1295                 if step is not None:
1296                     if i - prev == step:
1297                         continue
1298                     yield _genslice(start, prev, step)
1299                     step = None
1300                     continue
1301                 if i - prev in [-1, 1]:
1302                     step = i - prev
1303                     start = prev
1304                     continue
1305                 else:
1306                     yield 's[%d]' % prev
1307             if step is None:
1308                 yield 's[%d]' % i
1309             else:
1310                 yield _genslice(start, i, step)
1311
1312         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1313         cache_res = func(test_string)
1314         cache_spec = [ord(c) for c in cache_res]
1315         expr_code = ' + '.join(gen_sig_code(cache_spec))
1316         signature_id_tuple = '(%s)' % (
1317             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1318         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1319                 '    return %s\n') % (signature_id_tuple, expr_code)
1320         self.to_screen('Extracted signature function:\n' + code)
1321
1322     def _parse_sig_js(self, jscode):
1323         funcname = self._search_regex(
1324             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1325              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1326              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1327              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1328              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1329              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1330              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1331              # Obsolete patterns
1332              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1333              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1334              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1335              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1336              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1337              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1338              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1339              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1340             jscode, 'Initial JS player signature function name', group='sig')
1341
1342         jsi = JSInterpreter(jscode)
1343         initial_function = jsi.extract_function(funcname)
1344         return lambda s: initial_function([s])
1345
1346     def _decrypt_signature(self, s, video_id, player_url):
1347         """Turn the encrypted s field into a working signature"""
1348
1349         if player_url is None:
1350             raise ExtractorError('Cannot decrypt signature without player_url')
1351
1352         if player_url.startswith('//'):
1353             player_url = 'https:' + player_url
1354         elif not re.match(r'https?://', player_url):
1355             player_url = compat_urlparse.urljoin(
1356                 'https://www.youtube.com', player_url)
1357         try:
1358             player_id = (player_url, self._signature_cache_id(s))
1359             if player_id not in self._player_cache:
1360                 func = self._extract_signature_function(
1361                     video_id, player_url, s
1362                 )
1363                 self._player_cache[player_id] = func
1364             func = self._player_cache[player_id]
1365             if self._downloader.params.get('youtube_print_sig_code'):
1366                 self._print_sig_code(func, s)
1367             return func(s)
1368         except Exception as e:
1369             tb = traceback.format_exc()
1370             raise ExtractorError(
1371                 'Signature extraction failed: ' + tb, cause=e)
1372
1373     def _mark_watched(self, video_id, player_response):
1374         playback_url = url_or_none(try_get(
1375             player_response,
1376             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
1377         if not playback_url:
1378             return
1379         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1380         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1381
1382         # cpn generation algorithm is reverse engineered from base.js.
1383         # In fact it works even with dummy cpn.
1384         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1385         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1386
1387         qs.update({
1388             'ver': ['2'],
1389             'cpn': [cpn],
1390         })
1391         playback_url = compat_urlparse.urlunparse(
1392             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1393
1394         self._download_webpage(
1395             playback_url, video_id, 'Marking watched',
1396             'Unable to mark watched', fatal=False)
1397
1398     @staticmethod
1399     def _extract_urls(webpage):
1400         # Embedded YouTube player
1401         entries = [
1402             unescapeHTML(mobj.group('url'))
1403             for mobj in re.finditer(r'''(?x)
1404             (?:
1405                 <iframe[^>]+?src=|
1406                 data-video-url=|
1407                 <embed[^>]+?src=|
1408                 embedSWF\(?:\s*|
1409                 <object[^>]+data=|
1410                 new\s+SWFObject\(
1411             )
1412             (["\'])
1413                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1414                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1415             \1''', webpage)]
1416
1417         # lazyYT YouTube embed
1418         entries.extend(list(map(
1419             unescapeHTML,
1420             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1421
1422         # Wordpress "YouTube Video Importer" plugin
1423         matches = re.findall(r'''(?x)<div[^>]+
1424             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1425             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1426         entries.extend(m[-1] for m in matches)
1427
1428         return entries
1429
1430     @staticmethod
1431     def _extract_url(webpage):
1432         urls = YoutubeIE._extract_urls(webpage)
1433         return urls[0] if urls else None
1434
1435     @classmethod
1436     def extract_id(cls, url):
1437         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1438         if mobj is None:
1439             raise ExtractorError('Invalid URL: %s' % url)
1440         video_id = mobj.group(2)
1441         return video_id
1442
1443     def _extract_chapters_from_json(self, data, video_id, duration):
1444         chapters_list = try_get(
1445             data,
1446             lambda x: x['playerOverlays']
1447                        ['playerOverlayRenderer']
1448                        ['decoratedPlayerBarRenderer']
1449                        ['decoratedPlayerBarRenderer']
1450                        ['playerBar']
1451                        ['chapteredPlayerBarRenderer']
1452                        ['chapters'],
1453             list)
1454         if not chapters_list:
1455             return
1456
1457         def chapter_time(chapter):
1458             return float_or_none(
1459                 try_get(
1460                     chapter,
1461                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1462                     int),
1463                 scale=1000)
1464         chapters = []
1465         for next_num, chapter in enumerate(chapters_list, start=1):
1466             start_time = chapter_time(chapter)
1467             if start_time is None:
1468                 continue
1469             end_time = (chapter_time(chapters_list[next_num])
1470                         if next_num < len(chapters_list) else duration)
1471             if end_time is None:
1472                 continue
1473             title = try_get(
1474                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1475                 compat_str)
1476             chapters.append({
1477                 'start_time': start_time,
1478                 'end_time': end_time,
1479                 'title': title,
1480             })
1481         return chapters
1482
1483     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1484         return self._parse_json(self._search_regex(
1485             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1486              regex), webpage, name, default='{}'), video_id, fatal=False)
1487
1488     def _real_extract(self, url):
1489         url, smuggled_data = unsmuggle_url(url, {})
1490         video_id = self._match_id(url)
1491         base_url = self.http_scheme() + '//www.youtube.com/'
1492         webpage_url = base_url + 'watch?v=' + video_id
1493         webpage = self._download_webpage(
1494             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
1495
1496         player_response = None
1497         if webpage:
1498             player_response = self._extract_yt_initial_variable(
1499                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1500                 video_id, 'initial player response')
1501         if not player_response:
1502             player_response = self._call_api(
1503                 'player', {'videoId': video_id}, video_id)
1504
1505         playability_status = player_response.get('playabilityStatus') or {}
1506         if playability_status.get('reason') == 'Sign in to confirm your age':
1507             pr = self._parse_json(try_get(compat_parse_qs(
1508                 self._download_webpage(
1509                     base_url + 'get_video_info', video_id,
1510                     'Refetching age-gated info webpage',
1511                     'unable to download video info webpage', query={
1512                         'video_id': video_id,
1513                         'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1514                         'html5': 1,
1515                     }, fatal=False)),
1516                 lambda x: x['player_response'][0],
1517                 compat_str) or '{}', video_id)
1518             if pr:
1519                 player_response = pr
1520
1521         trailer_video_id = try_get(
1522             playability_status,
1523             lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1524             compat_str)
1525         if trailer_video_id:
1526             return self.url_result(
1527                 trailer_video_id, self.ie_key(), trailer_video_id)
1528
1529         def get_text(x):
1530             if not x:
1531                 return
1532             text = x.get('simpleText')
1533             if text and isinstance(text, compat_str):
1534                 return text
1535             runs = x.get('runs')
1536             if not isinstance(runs, list):
1537                 return
1538             return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1539
1540         search_meta = (
1541             lambda x: self._html_search_meta(x, webpage, default=None)) \
1542             if webpage else lambda x: None
1543
1544         video_details = player_response.get('videoDetails') or {}
1545         microformat = try_get(
1546             player_response,
1547             lambda x: x['microformat']['playerMicroformatRenderer'],
1548             dict) or {}
1549         video_title = video_details.get('title') \
1550             or get_text(microformat.get('title')) \
1551             or search_meta(['og:title', 'twitter:title', 'title'])
1552         video_description = video_details.get('shortDescription')
1553
1554         if not smuggled_data.get('force_singlefeed', False):
1555             if not self._downloader.params.get('noplaylist'):
1556                 multifeed_metadata_list = try_get(
1557                     player_response,
1558                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1559                     compat_str)
1560                 if multifeed_metadata_list:
1561                     entries = []
1562                     feed_ids = []
1563                     for feed in multifeed_metadata_list.split(','):
1564                         # Unquote should take place before split on comma (,) since textual
1565                         # fields may contain comma as well (see
1566                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1567                         feed_data = compat_parse_qs(
1568                             compat_urllib_parse_unquote_plus(feed))
1569
1570                         def feed_entry(name):
1571                             return try_get(
1572                                 feed_data, lambda x: x[name][0], compat_str)
1573
1574                         feed_id = feed_entry('id')
1575                         if not feed_id:
1576                             continue
1577                         feed_title = feed_entry('title')
1578                         title = video_title
1579                         if feed_title:
1580                             title += ' (%s)' % feed_title
1581                         entries.append({
1582                             '_type': 'url_transparent',
1583                             'ie_key': 'Youtube',
1584                             'url': smuggle_url(
1585                                 base_url + 'watch?v=' + feed_data['id'][0],
1586                                 {'force_singlefeed': True}),
1587                             'title': title,
1588                         })
1589                         feed_ids.append(feed_id)
1590                     self.to_screen(
1591                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1592                         % (', '.join(feed_ids), video_id))
1593                     return self.playlist_result(
1594                         entries, video_id, video_title, video_description)
1595             else:
1596                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1597
1598         formats = []
1599         itags = []
1600         itag_qualities = {}
1601         player_url = None
1602         q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
1603         streaming_data = player_response.get('streamingData') or {}
1604         streaming_formats = streaming_data.get('formats') or []
1605         streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1606         for fmt in streaming_formats:
1607             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1608                 continue
1609
1610             itag = str_or_none(fmt.get('itag'))
1611             quality = fmt.get('quality')
1612             if itag and quality:
1613                 itag_qualities[itag] = quality
1614             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1615             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1616             # number of fragment that would subsequently requested with (`&sq=N`)
1617             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1618                 continue
1619
1620             fmt_url = fmt.get('url')
1621             if not fmt_url:
1622                 sc = compat_parse_qs(fmt.get('signatureCipher'))
1623                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1624                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1625                 if not (sc and fmt_url and encrypted_sig):
1626                     continue
1627                 if not player_url:
1628                     if not webpage:
1629                         continue
1630                     player_url = self._search_regex(
1631                         r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1632                         webpage, 'player URL', fatal=False)
1633                 if not player_url:
1634                     continue
1635                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1636                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1637                 fmt_url += '&' + sp + '=' + signature
1638
1639             if itag:
1640                 itags.append(itag)
1641             tbr = float_or_none(
1642                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
1643             dct = {
1644                 'asr': int_or_none(fmt.get('audioSampleRate')),
1645                 'filesize': int_or_none(fmt.get('contentLength')),
1646                 'format_id': itag,
1647                 'format_note': fmt.get('qualityLabel') or quality,
1648                 'fps': int_or_none(fmt.get('fps')),
1649                 'height': int_or_none(fmt.get('height')),
1650                 'quality': q(quality),
1651                 'tbr': tbr,
1652                 'url': fmt_url,
1653                 'width': fmt.get('width'),
1654             }
1655             mimetype = fmt.get('mimeType')
1656             if mimetype:
1657                 mobj = re.match(
1658                     r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1659                 if mobj:
1660                     dct['ext'] = mimetype2ext(mobj.group(1))
1661                     dct.update(parse_codecs(mobj.group(2)))
1662             no_audio = dct.get('acodec') == 'none'
1663             no_video = dct.get('vcodec') == 'none'
1664             if no_audio:
1665                 dct['vbr'] = tbr
1666             if no_video:
1667                 dct['abr'] = tbr
1668             if no_audio or no_video:
1669                 dct['downloader_options'] = {
1670                     # Youtube throttles chunks >~10M
1671                     'http_chunk_size': 10485760,
1672                 }
1673                 if dct.get('ext'):
1674                     dct['container'] = dct['ext'] + '_dash'
1675             formats.append(dct)
1676
1677         hls_manifest_url = streaming_data.get('hlsManifestUrl')
1678         if hls_manifest_url:
1679             for f in self._extract_m3u8_formats(
1680                     hls_manifest_url, video_id, 'mp4', fatal=False):
1681                 itag = self._search_regex(
1682                     r'/itag/(\d+)', f['url'], 'itag', default=None)
1683                 if itag:
1684                     f['format_id'] = itag
1685                 formats.append(f)
1686
1687         if self._downloader.params.get('youtube_include_dash_manifest', True):
1688             dash_manifest_url = streaming_data.get('dashManifestUrl')
1689             if dash_manifest_url:
1690                 for f in self._extract_mpd_formats(
1691                         dash_manifest_url, video_id, fatal=False):
1692                     itag = f['format_id']
1693                     if itag in itags:
1694                         continue
1695                     if itag in itag_qualities:
1696                         f['quality'] = q(itag_qualities[itag])
1697                     filesize = int_or_none(self._search_regex(
1698                         r'/clen/(\d+)', f.get('fragment_base_url')
1699                         or f['url'], 'file size', default=None))
1700                     if filesize:
1701                         f['filesize'] = filesize
1702                     formats.append(f)
1703
1704         if not formats:
1705             if streaming_data.get('licenseInfos'):
1706                 raise ExtractorError(
1707                     'This video is DRM protected.', expected=True)
1708             pemr = try_get(
1709                 playability_status,
1710                 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1711                 dict) or {}
1712             reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1713             subreason = pemr.get('subreason')
1714             if subreason:
1715                 subreason = clean_html(get_text(subreason))
1716                 if subreason == 'The uploader has not made this video available in your country.':
1717                     countries = microformat.get('availableCountries')
1718                     if not countries:
1719                         regions_allowed = search_meta('regionsAllowed')
1720                         countries = regions_allowed.split(',') if regions_allowed else None
1721                     self.raise_geo_restricted(
1722                         subreason, countries)
1723                 reason += '\n' + subreason
1724             if reason:
1725                 raise ExtractorError(reason, expected=True)
1726
1727         self._sort_formats(formats)
1728
1729         keywords = video_details.get('keywords') or []
1730         if not keywords and webpage:
1731             keywords = [
1732                 unescapeHTML(m.group('content'))
1733                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
1734         for keyword in keywords:
1735             if keyword.startswith('yt:stretch='):
1736                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
1737                 if mobj:
1738                     # NB: float is intentional for forcing float division
1739                     w, h = (float(v) for v in mobj.groups())
1740                     if w > 0 and h > 0:
1741                         ratio = w / h
1742                         for f in formats:
1743                             if f.get('vcodec') != 'none':
1744                                 f['stretched_ratio'] = ratio
1745                         break
1746
1747         thumbnails = []
1748         for container in (video_details, microformat):
1749             for thumbnail in (try_get(
1750                     container,
1751                     lambda x: x['thumbnail']['thumbnails'], list) or []):
1752                 thumbnail_url = thumbnail.get('url')
1753                 if not thumbnail_url:
1754                     continue
1755                 thumbnails.append({
1756                     'height': int_or_none(thumbnail.get('height')),
1757                     'url': thumbnail_url,
1758                     'width': int_or_none(thumbnail.get('width')),
1759                 })
1760             if thumbnails:
1761                 break
1762         else:
1763             thumbnail = search_meta(['og:image', 'twitter:image'])
1764             if thumbnail:
1765                 thumbnails = [{'url': thumbnail}]
1766
1767         category = microformat.get('category') or search_meta('genre')
1768         channel_id = video_details.get('channelId') \
1769             or microformat.get('externalChannelId') \
1770             or search_meta('channelId')
1771         duration = int_or_none(
1772             video_details.get('lengthSeconds')
1773             or microformat.get('lengthSeconds')) \
1774             or parse_duration(search_meta('duration'))
1775         is_live = video_details.get('isLive')
1776         owner_profile_url = microformat.get('ownerProfileUrl')
1777
1778         info = {
1779             'id': video_id,
1780             'title': self._live_title(video_title) if is_live else video_title,
1781             'formats': formats,
1782             'thumbnails': thumbnails,
1783             'description': video_description,
1784             'upload_date': unified_strdate(
1785                 microformat.get('uploadDate')
1786                 or search_meta('uploadDate')),
1787             'uploader': video_details['author'],
1788             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
1789             'uploader_url': owner_profile_url,
1790             'channel_id': channel_id,
1791             'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
1792             'duration': duration,
1793             'view_count': int_or_none(
1794                 video_details.get('viewCount')
1795                 or microformat.get('viewCount')
1796                 or search_meta('interactionCount')),
1797             'average_rating': float_or_none(video_details.get('averageRating')),
1798             'age_limit': 18 if (
1799                 microformat.get('isFamilySafe') is False
1800                 or search_meta('isFamilyFriendly') == 'false'
1801                 or search_meta('og:restrictions:age') == '18+') else 0,
1802             'webpage_url': webpage_url,
1803             'categories': [category] if category else None,
1804             'tags': keywords,
1805             'is_live': is_live,
1806         }
1807
1808         pctr = try_get(
1809             player_response,
1810             lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
1811         if pctr:
1812             def process_language(container, base_url, lang_code, query):
1813                 lang_subs = []
1814                 for fmt in self._SUBTITLE_FORMATS:
1815                     query.update({
1816                         'fmt': fmt,
1817                     })
1818                     lang_subs.append({
1819                         'ext': fmt,
1820                         'url': update_url_query(base_url, query),
1821                     })
1822                 container[lang_code] = lang_subs
1823
1824             subtitles = {}
1825             for caption_track in (pctr.get('captionTracks') or []):
1826                 base_url = caption_track.get('baseUrl')
1827                 if not base_url:
1828                     continue
1829                 if caption_track.get('kind') != 'asr':
1830                     lang_code = caption_track.get('languageCode')
1831                     if not lang_code:
1832                         continue
1833                     process_language(
1834                         subtitles, base_url, lang_code, {})
1835                     continue
1836                 automatic_captions = {}
1837                 for translation_language in (pctr.get('translationLanguages') or []):
1838                     translation_language_code = translation_language.get('languageCode')
1839                     if not translation_language_code:
1840                         continue
1841                     process_language(
1842                         automatic_captions, base_url, translation_language_code,
1843                         {'tlang': translation_language_code})
1844                 info['automatic_captions'] = automatic_captions
1845             info['subtitles'] = subtitles
1846
1847         parsed_url = compat_urllib_parse_urlparse(url)
1848         for component in [parsed_url.fragment, parsed_url.query]:
1849             query = compat_parse_qs(component)
1850             for k, v in query.items():
1851                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
1852                     d_k += '_time'
1853                     if d_k not in info and k in s_ks:
1854                         info[d_k] = parse_duration(query[k][0])
1855
1856         if video_description:
1857             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
1858             if mobj:
1859                 release_year = mobj.group('release_year')
1860                 release_date = mobj.group('release_date')
1861                 if release_date:
1862                     release_date = release_date.replace('-', '')
1863                     if not release_year:
1864                         release_year = release_date[:4]
1865                 info.update({
1866                     'album': mobj.group('album'.strip()),
1867                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
1868                     'track': mobj.group('track').strip(),
1869                     'release_date': release_date,
1870                     'release_year': int_or_none(release_year),
1871                 })
1872
1873         initial_data = None
1874         if webpage:
1875             initial_data = self._extract_yt_initial_variable(
1876                 webpage, self._YT_INITIAL_DATA_RE, video_id,
1877                 'yt initial data')
1878         if not initial_data:
1879             initial_data = self._call_api(
1880                 'next', {'videoId': video_id}, video_id, fatal=False)
1881
1882         if initial_data:
1883             chapters = self._extract_chapters_from_json(
1884                 initial_data, video_id, duration)
1885             if not chapters:
1886                 for engagment_pannel in (initial_data.get('engagementPanels') or []):
1887                     contents = try_get(
1888                         engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
1889                         list)
1890                     if not contents:
1891                         continue
1892
1893                     def chapter_time(mmlir):
1894                         return parse_duration(
1895                             get_text(mmlir.get('timeDescription')))
1896
1897                     chapters = []
1898                     for next_num, content in enumerate(contents, start=1):
1899                         mmlir = content.get('macroMarkersListItemRenderer') or {}
1900                         start_time = chapter_time(mmlir)
1901                         end_time = chapter_time(try_get(
1902                             contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
1903                             if next_num < len(contents) else duration
1904                         if start_time is None or end_time is None:
1905                             continue
1906                         chapters.append({
1907                             'start_time': start_time,
1908                             'end_time': end_time,
1909                             'title': get_text(mmlir.get('title')),
1910                         })
1911                     if chapters:
1912                         break
1913             if chapters:
1914                 info['chapters'] = chapters
1915
1916             contents = try_get(
1917                 initial_data,
1918                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
1919                 list) or []
1920             for content in contents:
1921                 vpir = content.get('videoPrimaryInfoRenderer')
1922                 if vpir:
1923                     stl = vpir.get('superTitleLink')
1924                     if stl:
1925                         stl = get_text(stl)
1926                         if try_get(
1927                                 vpir,
1928                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
1929                             info['location'] = stl
1930                         else:
1931                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
1932                             if mobj:
1933                                 info.update({
1934                                     'series': mobj.group(1),
1935                                     'season_number': int(mobj.group(2)),
1936                                     'episode_number': int(mobj.group(3)),
1937                                 })
1938                     for tlb in (try_get(
1939                             vpir,
1940                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
1941                             list) or []):
1942                         tbr = tlb.get('toggleButtonRenderer') or {}
1943                         for getter, regex in [(
1944                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
1945                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
1946                                     lambda x: x['accessibility'],
1947                                     lambda x: x['accessibilityData']['accessibilityData'],
1948                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
1949                             label = (try_get(tbr, getter, dict) or {}).get('label')
1950                             if label:
1951                                 mobj = re.match(regex, label)
1952                                 if mobj:
1953                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
1954                                     break
1955                     sbr_tooltip = try_get(
1956                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
1957                     if sbr_tooltip:
1958                         like_count, dislike_count = sbr_tooltip.split(' / ')
1959                         info.update({
1960                             'like_count': str_to_int(like_count),
1961                             'dislike_count': str_to_int(dislike_count),
1962                         })
1963                 vsir = content.get('videoSecondaryInfoRenderer')
1964                 if vsir:
1965                     info['channel'] = get_text(try_get(
1966                         vsir,
1967                         lambda x: x['owner']['videoOwnerRenderer']['title'],
1968                         dict))
1969                     rows = try_get(
1970                         vsir,
1971                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
1972                         list) or []
1973                     multiple_songs = False
1974                     for row in rows:
1975                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
1976                             multiple_songs = True
1977                             break
1978                     for row in rows:
1979                         mrr = row.get('metadataRowRenderer') or {}
1980                         mrr_title = mrr.get('title')
1981                         if not mrr_title:
1982                             continue
1983                         mrr_title = get_text(mrr['title'])
1984                         mrr_contents_text = get_text(mrr['contents'][0])
1985                         if mrr_title == 'License':
1986                             info['license'] = mrr_contents_text
1987                         elif not multiple_songs:
1988                             if mrr_title == 'Album':
1989                                 info['album'] = mrr_contents_text
1990                             elif mrr_title == 'Artist':
1991                                 info['artist'] = mrr_contents_text
1992                             elif mrr_title == 'Song':
1993                                 info['track'] = mrr_contents_text
1994
1995         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
1996             v = info.get(s_k)
1997             if v:
1998                 info[d_k] = v
1999
2000         self.mark_watched(video_id, player_response)
2001
2002         return info
2003
2004
2005 class YoutubeTabIE(YoutubeBaseInfoExtractor):
2006     IE_DESC = 'YouTube.com tab'
2007     _VALID_URL = r'''(?x)
2008                     https?://
2009                         (?:\w+\.)?
2010                         (?:
2011                             youtube(?:kids)?\.com|
2012                             invidio\.us
2013                         )/
2014                         (?:
2015                             (?:channel|c|user|feed|hashtag)/|
2016                             (?:playlist|watch)\?.*?\blist=|
2017                             (?!(?:watch|embed|v|e)\b)
2018                         )
2019                         (?P<id>[^/?\#&]+)
2020                     '''
2021     IE_NAME = 'youtube:tab'
2022
2023     _TESTS = [{
2024         # playlists, multipage
2025         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2026         'playlist_mincount': 94,
2027         'info_dict': {
2028             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2029             'title': 'Игорь Клейнер - Playlists',
2030             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2031         },
2032     }, {
2033         # playlists, multipage, different order
2034         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2035         'playlist_mincount': 94,
2036         'info_dict': {
2037             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2038             'title': 'Игорь Клейнер - Playlists',
2039             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2040         },
2041     }, {
2042         # playlists, series
2043         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2044         'playlist_mincount': 5,
2045         'info_dict': {
2046             'id': 'UCYO_jab_esuFRV4b17AJtAw',
2047             'title': '3Blue1Brown - Playlists',
2048             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2049         },
2050     }, {
2051         # playlists, singlepage
2052         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2053         'playlist_mincount': 4,
2054         'info_dict': {
2055             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2056             'title': 'ThirstForScience - Playlists',
2057             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2058         }
2059     }, {
2060         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2061         'only_matching': True,
2062     }, {
2063         # basic, single video playlist
2064         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2065         'info_dict': {
2066             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2067             'uploader': 'Sergey M.',
2068             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2069             'title': 'youtube-dl public playlist',
2070         },
2071         'playlist_count': 1,
2072     }, {
2073         # empty playlist
2074         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2075         'info_dict': {
2076             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2077             'uploader': 'Sergey M.',
2078             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2079             'title': 'youtube-dl empty playlist',
2080         },
2081         'playlist_count': 0,
2082     }, {
2083         # Home tab
2084         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
2085         'info_dict': {
2086             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2087             'title': 'lex will - Home',
2088             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2089         },
2090         'playlist_mincount': 2,
2091     }, {
2092         # Videos tab
2093         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
2094         'info_dict': {
2095             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2096             'title': 'lex will - Videos',
2097             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2098         },
2099         'playlist_mincount': 975,
2100     }, {
2101         # Videos tab, sorted by popular
2102         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
2103         'info_dict': {
2104             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2105             'title': 'lex will - Videos',
2106             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2107         },
2108         'playlist_mincount': 199,
2109     }, {
2110         # Playlists tab
2111         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
2112         'info_dict': {
2113             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2114             'title': 'lex will - Playlists',
2115             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2116         },
2117         'playlist_mincount': 17,
2118     }, {
2119         # Community tab
2120         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
2121         'info_dict': {
2122             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2123             'title': 'lex will - Community',
2124             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2125         },
2126         'playlist_mincount': 18,
2127     }, {
2128         # Channels tab
2129         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
2130         'info_dict': {
2131             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2132             'title': 'lex will - Channels',
2133             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2134         },
2135         'playlist_mincount': 138,
2136     }, {
2137         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2138         'only_matching': True,
2139     }, {
2140         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2141         'only_matching': True,
2142     }, {
2143         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2144         'only_matching': True,
2145     }, {
2146         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2147         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2148         'info_dict': {
2149             'title': '29C3: Not my department',
2150             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2151             'uploader': 'Christiaan008',
2152             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2153         },
2154         'playlist_count': 96,
2155     }, {
2156         'note': 'Large playlist',
2157         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2158         'info_dict': {
2159             'title': 'Uploads from Cauchemar',
2160             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2161             'uploader': 'Cauchemar',
2162             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
2163         },
2164         'playlist_mincount': 1123,
2165     }, {
2166         # even larger playlist, 8832 videos
2167         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2168         'only_matching': True,
2169     }, {
2170         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2171         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2172         'info_dict': {
2173             'title': 'Uploads from Interstellar Movie',
2174             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2175             'uploader': 'Interstellar Movie',
2176             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
2177         },
2178         'playlist_mincount': 21,
2179     }, {
2180         # https://github.com/ytdl-org/youtube-dl/issues/21844
2181         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2182         'info_dict': {
2183             'title': 'Data Analysis with Dr Mike Pound',
2184             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2185             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2186             'uploader': 'Computerphile',
2187         },
2188         'playlist_mincount': 11,
2189     }, {
2190         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2191         'only_matching': True,
2192     }, {
2193         # Playlist URL that does not actually serve a playlist
2194         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2195         'info_dict': {
2196             'id': 'FqZTN594JQw',
2197             'ext': 'webm',
2198             'title': "Smiley's People 01 detective, Adventure Series, Action",
2199             'uploader': 'STREEM',
2200             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2201             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2202             'upload_date': '20150526',
2203             'license': 'Standard YouTube License',
2204             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2205             'categories': ['People & Blogs'],
2206             'tags': list,
2207             'view_count': int,
2208             'like_count': int,
2209             'dislike_count': int,
2210         },
2211         'params': {
2212             'skip_download': True,
2213         },
2214         'skip': 'This video is not available.',
2215         'add_ie': [YoutubeIE.ie_key()],
2216     }, {
2217         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2218         'only_matching': True,
2219     }, {
2220         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
2221         'only_matching': True,
2222     }, {
2223         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2224         'info_dict': {
2225             'id': '9Auq9mYxFEE',
2226             'ext': 'mp4',
2227             'title': 'Watch Sky News live',
2228             'uploader': 'Sky News',
2229             'uploader_id': 'skynews',
2230             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2231             'upload_date': '20191102',
2232             'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2233             'categories': ['News & Politics'],
2234             'tags': list,
2235             'like_count': int,
2236             'dislike_count': int,
2237         },
2238         'params': {
2239             'skip_download': True,
2240         },
2241     }, {
2242         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2243         'info_dict': {
2244             'id': 'a48o2S1cPoo',
2245             'ext': 'mp4',
2246             'title': 'The Young Turks - Live Main Show',
2247             'uploader': 'The Young Turks',
2248             'uploader_id': 'TheYoungTurks',
2249             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2250             'upload_date': '20150715',
2251             'license': 'Standard YouTube License',
2252             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2253             'categories': ['News & Politics'],
2254             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2255             'like_count': int,
2256             'dislike_count': int,
2257         },
2258         'params': {
2259             'skip_download': True,
2260         },
2261         'only_matching': True,
2262     }, {
2263         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2264         'only_matching': True,
2265     }, {
2266         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2267         'only_matching': True,
2268     }, {
2269         'url': 'https://www.youtube.com/feed/trending',
2270         'only_matching': True,
2271     }, {
2272         # needs auth
2273         'url': 'https://www.youtube.com/feed/library',
2274         'only_matching': True,
2275     }, {
2276         # needs auth
2277         'url': 'https://www.youtube.com/feed/history',
2278         'only_matching': True,
2279     }, {
2280         # needs auth
2281         'url': 'https://www.youtube.com/feed/subscriptions',
2282         'only_matching': True,
2283     }, {
2284         # needs auth
2285         'url': 'https://www.youtube.com/feed/watch_later',
2286         'only_matching': True,
2287     }, {
2288         # no longer available?
2289         'url': 'https://www.youtube.com/feed/recommended',
2290         'only_matching': True,
2291     }, {
2292         # inline playlist with not always working continuations
2293         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2294         'only_matching': True,
2295     }, {
2296         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2297         'only_matching': True,
2298     }, {
2299         'url': 'https://www.youtube.com/course',
2300         'only_matching': True,
2301     }, {
2302         'url': 'https://www.youtube.com/zsecurity',
2303         'only_matching': True,
2304     }, {
2305         'url': 'http://www.youtube.com/NASAgovVideo/videos',
2306         'only_matching': True,
2307     }, {
2308         'url': 'https://www.youtube.com/TheYoungTurks/live',
2309         'only_matching': True,
2310     }, {
2311         'url': 'https://www.youtube.com/hashtag/cctv9',
2312         'info_dict': {
2313             'id': 'cctv9',
2314             'title': '#cctv9',
2315         },
2316         'playlist_mincount': 350,
2317     }, {
2318         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2319         'only_matching': True,
2320     }]
2321
2322     @classmethod
2323     def suitable(cls, url):
2324         return False if YoutubeIE.suitable(url) else super(
2325             YoutubeTabIE, cls).suitable(url)
2326
2327     def _extract_channel_id(self, webpage):
2328         channel_id = self._html_search_meta(
2329             'channelId', webpage, 'channel id', default=None)
2330         if channel_id:
2331             return channel_id
2332         channel_url = self._html_search_meta(
2333             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2334              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2335              'twitter:app:url:googleplay'), webpage, 'channel url')
2336         return self._search_regex(
2337             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2338             channel_url, 'channel id')
2339
2340     @staticmethod
2341     def _extract_grid_item_renderer(item):
2342         assert isinstance(item, dict)
2343         for key, renderer in item.items():
2344             if not key.startswith('grid') or not key.endswith('Renderer'):
2345                 continue
2346             if not isinstance(renderer, dict):
2347                 continue
2348             return renderer
2349
2350     def _grid_entries(self, grid_renderer):
2351         for item in grid_renderer['items']:
2352             if not isinstance(item, dict):
2353                 continue
2354             renderer = self._extract_grid_item_renderer(item)
2355             if not isinstance(renderer, dict):
2356                 continue
2357             title = try_get(
2358                 renderer, (lambda x: x['title']['runs'][0]['text'],
2359                            lambda x: x['title']['simpleText']), compat_str)
2360             # playlist
2361             playlist_id = renderer.get('playlistId')
2362             if playlist_id:
2363                 yield self.url_result(
2364                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
2365                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2366                     video_title=title)
2367                 continue
2368             # video
2369             video_id = renderer.get('videoId')
2370             if video_id:
2371                 yield self._extract_video(renderer)
2372                 continue
2373             # channel
2374             channel_id = renderer.get('channelId')
2375             if channel_id:
2376                 title = try_get(
2377                     renderer, lambda x: x['title']['simpleText'], compat_str)
2378                 yield self.url_result(
2379                     'https://www.youtube.com/channel/%s' % channel_id,
2380                     ie=YoutubeTabIE.ie_key(), video_title=title)
2381                 continue
2382             # generic endpoint URL support
2383             ep_url = urljoin('https://www.youtube.com/', try_get(
2384                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
2385                 compat_str))
2386             if ep_url:
2387                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
2388                     if ie.suitable(ep_url):
2389                         yield self.url_result(
2390                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
2391                         break
2392
2393     def _shelf_entries_from_content(self, shelf_renderer):
2394         content = shelf_renderer.get('content')
2395         if not isinstance(content, dict):
2396             return
2397         renderer = content.get('gridRenderer')
2398         if renderer:
2399             # TODO: add support for nested playlists so each shelf is processed
2400             # as separate playlist
2401             # TODO: this includes only first N items
2402             for entry in self._grid_entries(renderer):
2403                 yield entry
2404         renderer = content.get('horizontalListRenderer')
2405         if renderer:
2406             # TODO
2407             pass
2408
2409     def _shelf_entries(self, shelf_renderer, skip_channels=False):
2410         ep = try_get(
2411             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2412             compat_str)
2413         shelf_url = urljoin('https://www.youtube.com', ep)
2414         if shelf_url:
2415             # Skipping links to another channels, note that checking for
2416             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2417             # will not work
2418             if skip_channels and '/channels?' in shelf_url:
2419                 return
2420             title = try_get(
2421                 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2422             yield self.url_result(shelf_url, video_title=title)
2423         # Shelf may not contain shelf URL, fallback to extraction from content
2424         for entry in self._shelf_entries_from_content(shelf_renderer):
2425             yield entry
2426
2427     def _playlist_entries(self, video_list_renderer):
2428         for content in video_list_renderer['contents']:
2429             if not isinstance(content, dict):
2430                 continue
2431             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2432             if not isinstance(renderer, dict):
2433                 continue
2434             video_id = renderer.get('videoId')
2435             if not video_id:
2436                 continue
2437             yield self._extract_video(renderer)
2438
2439     def _video_entry(self, video_renderer):
2440         video_id = video_renderer.get('videoId')
2441         if video_id:
2442             return self._extract_video(video_renderer)
2443
2444     def _post_thread_entries(self, post_thread_renderer):
2445         post_renderer = try_get(
2446             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2447         if not post_renderer:
2448             return
2449         # video attachment
2450         video_renderer = try_get(
2451             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2452         video_id = None
2453         if video_renderer:
2454             entry = self._video_entry(video_renderer)
2455             if entry:
2456                 yield entry
2457         # inline video links
2458         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2459         for run in runs:
2460             if not isinstance(run, dict):
2461                 continue
2462             ep_url = try_get(
2463                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2464             if not ep_url:
2465                 continue
2466             if not YoutubeIE.suitable(ep_url):
2467                 continue
2468             ep_video_id = YoutubeIE._match_id(ep_url)
2469             if video_id == ep_video_id:
2470                 continue
2471             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
2472
2473     def _post_thread_continuation_entries(self, post_thread_continuation):
2474         contents = post_thread_continuation.get('contents')
2475         if not isinstance(contents, list):
2476             return
2477         for content in contents:
2478             renderer = content.get('backstagePostThreadRenderer')
2479             if not isinstance(renderer, dict):
2480                 continue
2481             for entry in self._post_thread_entries(renderer):
2482                 yield entry
2483
2484     def _rich_grid_entries(self, contents):
2485         for content in contents:
2486             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
2487             if video_renderer:
2488                 entry = self._video_entry(video_renderer)
2489                 if entry:
2490                     yield entry
2491
2492     @staticmethod
2493     def _build_continuation_query(continuation, ctp=None):
2494         query = {
2495             'ctoken': continuation,
2496             'continuation': continuation,
2497         }
2498         if ctp:
2499             query['itct'] = ctp
2500         return query
2501
2502     @staticmethod
2503     def _extract_next_continuation_data(renderer):
2504         next_continuation = try_get(
2505             renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2506         if not next_continuation:
2507             return
2508         continuation = next_continuation.get('continuation')
2509         if not continuation:
2510             return
2511         ctp = next_continuation.get('clickTrackingParams')
2512         return YoutubeTabIE._build_continuation_query(continuation, ctp)
2513
2514     @classmethod
2515     def _extract_continuation(cls, renderer):
2516         next_continuation = cls._extract_next_continuation_data(renderer)
2517         if next_continuation:
2518             return next_continuation
2519         contents = []
2520         for key in ('contents', 'items'):
2521             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
2522         for content in contents:
2523             if not isinstance(content, dict):
2524                 continue
2525             continuation_ep = try_get(
2526                 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2527                 dict)
2528             if not continuation_ep:
2529                 continue
2530             continuation = try_get(
2531                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2532             if not continuation:
2533                 continue
2534             ctp = continuation_ep.get('clickTrackingParams')
2535             return YoutubeTabIE._build_continuation_query(continuation, ctp)
2536
2537     def _entries(self, tab, item_id, webpage):
2538         tab_content = try_get(tab, lambda x: x['content'], dict)
2539         if not tab_content:
2540             return
2541         slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2542         if slr_renderer:
2543             is_channels_tab = tab.get('title') == 'Channels'
2544             continuation = None
2545             slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
2546             for slr_content in slr_contents:
2547                 if not isinstance(slr_content, dict):
2548                     continue
2549                 is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
2550                 if not is_renderer:
2551                     continue
2552                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2553                 for isr_content in isr_contents:
2554                     if not isinstance(isr_content, dict):
2555                         continue
2556                     renderer = isr_content.get('playlistVideoListRenderer')
2557                     if renderer:
2558                         for entry in self._playlist_entries(renderer):
2559                             yield entry
2560                         continuation = self._extract_continuation(renderer)
2561                         continue
2562                     renderer = isr_content.get('gridRenderer')
2563                     if renderer:
2564                         for entry in self._grid_entries(renderer):
2565                             yield entry
2566                         continuation = self._extract_continuation(renderer)
2567                         continue
2568                     renderer = isr_content.get('shelfRenderer')
2569                     if renderer:
2570                         for entry in self._shelf_entries(renderer, not is_channels_tab):
2571                             yield entry
2572                         continue
2573                     renderer = isr_content.get('backstagePostThreadRenderer')
2574                     if renderer:
2575                         for entry in self._post_thread_entries(renderer):
2576                             yield entry
2577                         continuation = self._extract_continuation(renderer)
2578                         continue
2579                     renderer = isr_content.get('videoRenderer')
2580                     if renderer:
2581                         entry = self._video_entry(renderer)
2582                         if entry:
2583                             yield entry
2584
2585                 if not continuation:
2586                     continuation = self._extract_continuation(is_renderer)
2587             if not continuation:
2588                 continuation = self._extract_continuation(slr_renderer)
2589         else:
2590             rich_grid_renderer = tab_content.get('richGridRenderer')
2591             if not rich_grid_renderer:
2592                 return
2593             for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
2594                 yield entry
2595             continuation = self._extract_continuation(rich_grid_renderer)
2596
2597         ytcfg = self._extract_ytcfg(item_id, webpage)
2598         client_version = try_get(
2599             ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or '2.20210407.08.00'
2600
2601         headers = {
2602             'x-youtube-client-name': '1',
2603             'x-youtube-client-version': client_version,
2604             'content-type': 'application/json',
2605         }
2606
2607         context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict) or {
2608             'client': {
2609                 'clientName': 'WEB',
2610                 'clientVersion': client_version,
2611             }
2612         }
2613         visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
2614
2615         identity_token = self._extract_identity_token(ytcfg, webpage)
2616         if identity_token:
2617             headers['x-youtube-identity-token'] = identity_token
2618
2619         data = {
2620             'context': context,
2621         }
2622
2623         for page_num in itertools.count(1):
2624             if not continuation:
2625                 break
2626             if visitor_data:
2627                 headers['x-goog-visitor-id'] = visitor_data
2628             data['continuation'] = continuation['continuation']
2629             data['clickTracking'] = {
2630                 'clickTrackingParams': continuation['itct']
2631             }
2632             count = 0
2633             retries = 3
2634             while count <= retries:
2635                 try:
2636                     # Downloading page may result in intermittent 5xx HTTP error
2637                     # that is usually worked around with a retry
2638                     response = self._download_json(
2639                         'https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
2640                         None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''),
2641                         headers=headers, data=json.dumps(data).encode('utf8'))
2642                     break
2643                 except ExtractorError as e:
2644                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
2645                         count += 1
2646                         if count <= retries:
2647                             continue
2648                     raise
2649             if not response:
2650                 break
2651
2652             visitor_data = try_get(
2653                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
2654
2655             continuation_contents = try_get(
2656                 response, lambda x: x['continuationContents'], dict)
2657             if continuation_contents:
2658                 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
2659                 if continuation_renderer:
2660                     for entry in self._playlist_entries(continuation_renderer):
2661                         yield entry
2662                     continuation = self._extract_continuation(continuation_renderer)
2663                     continue
2664                 continuation_renderer = continuation_contents.get('gridContinuation')
2665                 if continuation_renderer:
2666                     for entry in self._grid_entries(continuation_renderer):
2667                         yield entry
2668                     continuation = self._extract_continuation(continuation_renderer)
2669                     continue
2670                 continuation_renderer = continuation_contents.get('itemSectionContinuation')
2671                 if continuation_renderer:
2672                     for entry in self._post_thread_continuation_entries(continuation_renderer):
2673                         yield entry
2674                     continuation = self._extract_continuation(continuation_renderer)
2675                     continue
2676
2677             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
2678             continuation_items = try_get(
2679                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
2680             if continuation_items:
2681                 continuation_item = continuation_items[0]
2682                 if not isinstance(continuation_item, dict):
2683                     continue
2684                 renderer = self._extract_grid_item_renderer(continuation_item)
2685                 if renderer:
2686                     grid_renderer = {'items': continuation_items}
2687                     for entry in self._grid_entries(grid_renderer):
2688                         yield entry
2689                     continuation = self._extract_continuation(grid_renderer)
2690                     continue
2691                 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
2692                 if renderer:
2693                     video_list_renderer = {'contents': continuation_items}
2694                     for entry in self._playlist_entries(video_list_renderer):
2695                         yield entry
2696                     continuation = self._extract_continuation(video_list_renderer)
2697                     continue
2698                 renderer = continuation_item.get('backstagePostThreadRenderer')
2699                 if renderer:
2700                     continuation_renderer = {'contents': continuation_items}
2701                     for entry in self._post_thread_continuation_entries(continuation_renderer):
2702                         yield entry
2703                     continuation = self._extract_continuation(continuation_renderer)
2704                     continue
2705                 renderer = continuation_item.get('richItemRenderer')
2706                 if renderer:
2707                     for entry in self._rich_grid_entries(continuation_items):
2708                         yield entry
2709                     continuation = self._extract_continuation({'contents': continuation_items})
2710                     continue
2711
2712             break
2713
2714     @staticmethod
2715     def _extract_selected_tab(tabs):
2716         for tab in tabs:
2717             if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
2718                 return tab['tabRenderer']
2719         else:
2720             raise ExtractorError('Unable to find selected tab')
2721
2722     @staticmethod
2723     def _extract_uploader(data):
2724         uploader = {}
2725         sidebar_renderer = try_get(
2726             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
2727         if sidebar_renderer:
2728             for item in sidebar_renderer:
2729                 if not isinstance(item, dict):
2730                     continue
2731                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
2732                 if not isinstance(renderer, dict):
2733                     continue
2734                 owner = try_get(
2735                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
2736                 if owner:
2737                     uploader['uploader'] = owner.get('text')
2738                     uploader['uploader_id'] = try_get(
2739                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
2740                     uploader['uploader_url'] = urljoin(
2741                         'https://www.youtube.com/',
2742                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
2743         return uploader
2744
2745     @staticmethod
2746     def _extract_alert(data):
2747         alerts = []
2748         for alert in try_get(data, lambda x: x['alerts'], list) or []:
2749             if not isinstance(alert, dict):
2750                 continue
2751             alert_text = try_get(
2752                 alert, lambda x: x['alertRenderer']['text'], dict)
2753             if not alert_text:
2754                 continue
2755             text = try_get(
2756                 alert_text,
2757                 (lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
2758                 compat_str)
2759             if text:
2760                 alerts.append(text)
2761         return '\n'.join(alerts)
2762
2763     def _extract_from_tabs(self, item_id, webpage, data, tabs):
2764         selected_tab = self._extract_selected_tab(tabs)
2765         renderer = try_get(
2766             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
2767         playlist_id = item_id
2768         title = description = None
2769         if renderer:
2770             channel_title = renderer.get('title') or item_id
2771             tab_title = selected_tab.get('title')
2772             title = channel_title or item_id
2773             if tab_title:
2774                 title += ' - %s' % tab_title
2775             description = renderer.get('description')
2776             playlist_id = renderer.get('externalId')
2777         else:
2778             renderer = try_get(
2779                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
2780             if renderer:
2781                 title = renderer.get('title')
2782             else:
2783                 renderer = try_get(
2784                     data, lambda x: x['header']['hashtagHeaderRenderer'], dict)
2785                 if renderer:
2786                     title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
2787         playlist = self.playlist_result(
2788             self._entries(selected_tab, item_id, webpage),
2789             playlist_id=playlist_id, playlist_title=title,
2790             playlist_description=description)
2791         playlist.update(self._extract_uploader(data))
2792         return playlist
2793
2794     def _extract_from_playlist(self, item_id, url, data, playlist):
2795         title = playlist.get('title') or try_get(
2796             data, lambda x: x['titleText']['simpleText'], compat_str)
2797         playlist_id = playlist.get('playlistId') or item_id
2798         # Inline playlist rendition continuation does not always work
2799         # at Youtube side, so delegating regular tab-based playlist URL
2800         # processing whenever possible.
2801         playlist_url = urljoin(url, try_get(
2802             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2803             compat_str))
2804         if playlist_url and playlist_url != url:
2805             return self.url_result(
2806                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2807                 video_title=title)
2808         return self.playlist_result(
2809             self._playlist_entries(playlist), playlist_id=playlist_id,
2810             playlist_title=title)
2811
2812     def _extract_identity_token(self, ytcfg, webpage):
2813         if ytcfg:
2814             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
2815             if token:
2816                 return token
2817         return self._search_regex(
2818             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
2819             'identity token', default=None)
2820
2821     def _real_extract(self, url):
2822         item_id = self._match_id(url)
2823         url = compat_urlparse.urlunparse(
2824             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
2825         # Handle both video/playlist URLs
2826         qs = parse_qs(url)
2827         video_id = qs.get('v', [None])[0]
2828         playlist_id = qs.get('list', [None])[0]
2829         if video_id and playlist_id:
2830             if self._downloader.params.get('noplaylist'):
2831                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2832                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
2833             self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2834         webpage = self._download_webpage(url, item_id)
2835         data = self._extract_yt_initial_data(item_id, webpage)
2836         tabs = try_get(
2837             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
2838         if tabs:
2839             return self._extract_from_tabs(item_id, webpage, data, tabs)
2840         playlist = try_get(
2841             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
2842         if playlist:
2843             return self._extract_from_playlist(item_id, url, data, playlist)
2844         # Fallback to video extraction if no playlist alike page is recognized.
2845         # First check for the current video then try the v attribute of URL query.
2846         video_id = try_get(
2847             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
2848             compat_str) or video_id
2849         if video_id:
2850             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
2851         # Capture and output alerts
2852         alert = self._extract_alert(data)
2853         if alert:
2854             raise ExtractorError(alert, expected=True)
2855         # Failed to recognize
2856         raise ExtractorError('Unable to recognize tab page')
2857
2858
2859 class YoutubePlaylistIE(InfoExtractor):
2860     IE_DESC = 'YouTube.com playlists'
2861     _VALID_URL = r'''(?x)(?:
2862                         (?:https?://)?
2863                         (?:\w+\.)?
2864                         (?:
2865                             (?:
2866                                 youtube(?:kids)?\.com|
2867                                 invidio\.us
2868                             )
2869                             /.*?\?.*?\blist=
2870                         )?
2871                         (?P<id>%(playlist_id)s)
2872                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2873     IE_NAME = 'youtube:playlist'
2874     _TESTS = [{
2875         'note': 'issue #673',
2876         'url': 'PLBB231211A4F62143',
2877         'info_dict': {
2878             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2879             'id': 'PLBB231211A4F62143',
2880             'uploader': 'Wickydoo',
2881             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
2882         },
2883         'playlist_mincount': 29,
2884     }, {
2885         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2886         'info_dict': {
2887             'title': 'YDL_safe_search',
2888             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2889         },
2890         'playlist_count': 2,
2891         'skip': 'This playlist is private',
2892     }, {
2893         'note': 'embedded',
2894         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2895         'playlist_count': 4,
2896         'info_dict': {
2897             'title': 'JODA15',
2898             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2899             'uploader': 'milan',
2900             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2901         }
2902     }, {
2903         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2904         'playlist_mincount': 982,
2905         'info_dict': {
2906             'title': '2018 Chinese New Singles (11/6 updated)',
2907             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2908             'uploader': 'LBK',
2909             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
2910         }
2911     }, {
2912         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2913         'only_matching': True,
2914     }, {
2915         # music album playlist
2916         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2917         'only_matching': True,
2918     }]
2919
2920     @classmethod
2921     def suitable(cls, url):
2922         if YoutubeTabIE.suitable(url):
2923             return False
2924         # Hack for lazy extractors until more generic solution is implemented
2925         # (see #28780)
2926         from .youtube import parse_qs
2927         qs = parse_qs(url)
2928         if qs.get('v', [None])[0]:
2929             return False
2930         return super(YoutubePlaylistIE, cls).suitable(url)
2931
2932     def _real_extract(self, url):
2933         playlist_id = self._match_id(url)
2934         qs = parse_qs(url)
2935         if not qs:
2936             qs = {'list': playlist_id}
2937         return self.url_result(
2938             update_url_query('https://www.youtube.com/playlist', qs),
2939             ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
2940
2941
2942 class YoutubeYtBeIE(InfoExtractor):
2943     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2944     _TESTS = [{
2945         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2946         'info_dict': {
2947             'id': 'yeWKywCrFtk',
2948             'ext': 'mp4',
2949             'title': 'Small Scale Baler and Braiding Rugs',
2950             'uploader': 'Backus-Page House Museum',
2951             'uploader_id': 'backuspagemuseum',
2952             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2953             'upload_date': '20161008',
2954             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2955             'categories': ['Nonprofits & Activism'],
2956             'tags': list,
2957             'like_count': int,
2958             'dislike_count': int,
2959         },
2960         'params': {
2961             'noplaylist': True,
2962             'skip_download': True,
2963         },
2964     }, {
2965         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2966         'only_matching': True,
2967     }]
2968
2969     def _real_extract(self, url):
2970         mobj = re.match(self._VALID_URL, url)
2971         video_id = mobj.group('id')
2972         playlist_id = mobj.group('playlist_id')
2973         return self.url_result(
2974             update_url_query('https://www.youtube.com/watch', {
2975                 'v': video_id,
2976                 'list': playlist_id,
2977                 'feature': 'youtu.be',
2978             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
2979
2980
2981 class YoutubeYtUserIE(InfoExtractor):
2982     _VALID_URL = r'ytuser:(?P<id>.+)'
2983     _TESTS = [{
2984         'url': 'ytuser:phihag',
2985         'only_matching': True,
2986     }]
2987
2988     def _real_extract(self, url):
2989         user_id = self._match_id(url)
2990         return self.url_result(
2991             'https://www.youtube.com/user/%s' % user_id,
2992             ie=YoutubeTabIE.ie_key(), video_id=user_id)
2993
2994
2995 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2996     IE_NAME = 'youtube:favorites'
2997     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2998     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2999     _LOGIN_REQUIRED = True
3000     _TESTS = [{
3001         'url': ':ytfav',
3002         'only_matching': True,
3003     }, {
3004         'url': ':ytfavorites',
3005         'only_matching': True,
3006     }]
3007
3008     def _real_extract(self, url):
3009         return self.url_result(
3010             'https://www.youtube.com/playlist?list=LL',
3011             ie=YoutubeTabIE.ie_key())
3012
3013
3014 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
3015     IE_DESC = 'YouTube.com searches'
3016     # there doesn't appear to be a real limit, for example if you search for
3017     # 'python' you get more than 8.000.000 results
3018     _MAX_RESULTS = float('inf')
3019     IE_NAME = 'youtube:search'
3020     _SEARCH_KEY = 'ytsearch'
3021     _SEARCH_PARAMS = None
3022     _TESTS = []
3023
3024     def _entries(self, query, n):
3025         data = {
3026             'context': {
3027                 'client': {
3028                     'clientName': 'WEB',
3029                     'clientVersion': '2.20201021.03.00',
3030                 }
3031             },
3032             'query': query,
3033         }
3034         if self._SEARCH_PARAMS:
3035             data['params'] = self._SEARCH_PARAMS
3036         total = 0
3037         for page_num in itertools.count(1):
3038             search = self._download_json(
3039                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3040                 video_id='query "%s"' % query,
3041                 note='Downloading page %s' % page_num,
3042                 errnote='Unable to download API page', fatal=False,
3043                 data=json.dumps(data).encode('utf8'),
3044                 headers={'content-type': 'application/json'})
3045             if not search:
3046                 break
3047             slr_contents = try_get(
3048                 search,
3049                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3050                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3051                 list)
3052             if not slr_contents:
3053                 break
3054             for slr_content in slr_contents:
3055                 isr_contents = try_get(