b913d07a63920de2c56570644f7b8175afd5fd8a
[ytdl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5
6 import itertools
7 import json
8 import os.path
9 import random
10 import re
11 import time
12 import traceback
13
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
18     compat_chr,
19     compat_HTTPError,
20     compat_kwargs,
21     compat_parse_qs,
22     compat_urllib_parse_unquote,
23     compat_urllib_parse_unquote_plus,
24     compat_urllib_parse_urlencode,
25     compat_urllib_parse_urlparse,
26     compat_urlparse,
27     compat_str,
28 )
29 from ..utils import (
30     bool_or_none,
31     clean_html,
32     dict_get,
33     error_to_compat_str,
34     extract_attributes,
35     ExtractorError,
36     float_or_none,
37     get_element_by_attribute,
38     get_element_by_id,
39     int_or_none,
40     mimetype2ext,
41     orderedSet,
42     parse_codecs,
43     parse_duration,
44     remove_quotes,
45     remove_start,
46     smuggle_url,
47     str_or_none,
48     str_to_int,
49     try_get,
50     unescapeHTML,
51     unified_strdate,
52     unsmuggle_url,
53     uppercase_escape,
54     url_or_none,
55     urlencode_postdata,
56 )
57
58
59 class YoutubeBaseInfoExtractor(InfoExtractor):
60     """Provide base functions for Youtube extractors"""
61     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
62     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
63
64     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
65     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
66     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
67
68     _NETRC_MACHINE = 'youtube'
69     # If True it will raise an error if no login info is provided
70     _LOGIN_REQUIRED = False
71
72     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
73
74     def _set_language(self):
75         self._set_cookie(
76             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
77             # YouTube sets the expire time to about two months
78             expire_time=time.time() + 2 * 30 * 24 * 3600)
79
80     def _ids_to_results(self, ids):
81         return [
82             self.url_result(vid_id, 'Youtube', video_id=vid_id)
83             for vid_id in ids]
84
85     def _login(self):
86         """
87         Attempt to log in to YouTube.
88         True is returned if successful or skipped.
89         False is returned if login failed.
90
91         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
92         """
93         username, password = self._get_login_info()
94         # No authentication to be performed
95         if username is None:
96             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
97                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
98             return True
99
100         login_page = self._download_webpage(
101             self._LOGIN_URL, None,
102             note='Downloading login page',
103             errnote='unable to fetch login page', fatal=False)
104         if login_page is False:
105             return
106
107         login_form = self._hidden_inputs(login_page)
108
109         def req(url, f_req, note, errnote):
110             data = login_form.copy()
111             data.update({
112                 'pstMsg': 1,
113                 'checkConnection': 'youtube',
114                 'checkedDomains': 'youtube',
115                 'hl': 'en',
116                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
117                 'f.req': json.dumps(f_req),
118                 'flowName': 'GlifWebSignIn',
119                 'flowEntry': 'ServiceLogin',
120                 # TODO: reverse actual botguard identifier generation algo
121                 'bgRequest': '["identifier",""]',
122             })
123             return self._download_json(
124                 url, None, note=note, errnote=errnote,
125                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
126                 fatal=False,
127                 data=urlencode_postdata(data), headers={
128                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
129                     'Google-Accounts-XSRF': 1,
130                 })
131
132         def warn(message):
133             self._downloader.report_warning(message)
134
135         lookup_req = [
136             username,
137             None, [], None, 'US', None, None, 2, False, True,
138             [
139                 None, None,
140                 [2, 1, None, 1,
141                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
142                  None, [], 4],
143                 1, [None, None, []], None, None, None, True
144             ],
145             username,
146         ]
147
148         lookup_results = req(
149             self._LOOKUP_URL, lookup_req,
150             'Looking up account info', 'Unable to look up account info')
151
152         if lookup_results is False:
153             return False
154
155         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
156         if not user_hash:
157             warn('Unable to extract user hash')
158             return False
159
160         challenge_req = [
161             user_hash,
162             None, 1, None, [1, None, None, None, [password, None, True]],
163             [
164                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
165                 1, [None, None, []], None, None, None, True
166             ]]
167
168         challenge_results = req(
169             self._CHALLENGE_URL, challenge_req,
170             'Logging in', 'Unable to log in')
171
172         if challenge_results is False:
173             return
174
175         login_res = try_get(challenge_results, lambda x: x[0][5], list)
176         if login_res:
177             login_msg = try_get(login_res, lambda x: x[5], compat_str)
178             warn(
179                 'Unable to login: %s' % 'Invalid password'
180                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
181             return False
182
183         res = try_get(challenge_results, lambda x: x[0][-1], list)
184         if not res:
185             warn('Unable to extract result entry')
186             return False
187
188         login_challenge = try_get(res, lambda x: x[0][0], list)
189         if login_challenge:
190             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
191             if challenge_str == 'TWO_STEP_VERIFICATION':
192                 # SEND_SUCCESS - TFA code has been successfully sent to phone
193                 # QUOTA_EXCEEDED - reached the limit of TFA codes
194                 status = try_get(login_challenge, lambda x: x[5], compat_str)
195                 if status == 'QUOTA_EXCEEDED':
196                     warn('Exceeded the limit of TFA codes, try later')
197                     return False
198
199                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
200                 if not tl:
201                     warn('Unable to extract TL')
202                     return False
203
204                 tfa_code = self._get_tfa_info('2-step verification code')
205
206                 if not tfa_code:
207                     warn(
208                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
209                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
210                     return False
211
212                 tfa_code = remove_start(tfa_code, 'G-')
213
214                 tfa_req = [
215                     user_hash, None, 2, None,
216                     [
217                         9, None, None, None, None, None, None, None,
218                         [None, tfa_code, True, 2]
219                     ]]
220
221                 tfa_results = req(
222                     self._TFA_URL.format(tl), tfa_req,
223                     'Submitting TFA code', 'Unable to submit TFA code')
224
225                 if tfa_results is False:
226                     return False
227
228                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
229                 if tfa_res:
230                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
231                     warn(
232                         'Unable to finish TFA: %s' % 'Invalid TFA code'
233                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
234                     return False
235
236                 check_cookie_url = try_get(
237                     tfa_results, lambda x: x[0][-1][2], compat_str)
238             else:
239                 CHALLENGES = {
240                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
241                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
242                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
243                 }
244                 challenge = CHALLENGES.get(
245                     challenge_str,
246                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
247                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
248                 return False
249         else:
250             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
251
252         if not check_cookie_url:
253             warn('Unable to extract CheckCookie URL')
254             return False
255
256         check_cookie_results = self._download_webpage(
257             check_cookie_url, None, 'Checking cookie', fatal=False)
258
259         if check_cookie_results is False:
260             return False
261
262         if 'https://myaccount.google.com/' not in check_cookie_results:
263             warn('Unable to log in')
264             return False
265
266         return True
267
268     def _download_webpage_handle(self, *args, **kwargs):
269         query = kwargs.get('query', {}).copy()
270         query['disable_polymer'] = 'true'
271         kwargs['query'] = query
272         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
273             *args, **compat_kwargs(kwargs))
274
275     def _real_initialize(self):
276         if self._downloader is None:
277             return
278         self._set_language()
279         if not self._login():
280             return
281
282
283 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
284     # Extract entries from page with "Load more" button
285     def _entries(self, page, playlist_id):
286         more_widget_html = content_html = page
287         for page_num in itertools.count(1):
288             for entry in self._process_page(content_html):
289                 yield entry
290
291             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
292             if not mobj:
293                 break
294
295             count = 0
296             retries = 3
297             while count <= retries:
298                 try:
299                     # Downloading page may result in intermittent 5xx HTTP error
300                     # that is usually worked around with a retry
301                     more = self._download_json(
302                         'https://youtube.com/%s' % mobj.group('more'), playlist_id,
303                         'Downloading page #%s%s'
304                         % (page_num, ' (retry #%d)' % count if count else ''),
305                         transform_source=uppercase_escape)
306                     break
307                 except ExtractorError as e:
308                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
309                         count += 1
310                         if count <= retries:
311                             continue
312                     raise
313
314             content_html = more['content_html']
315             if not content_html.strip():
316                 # Some webpages show a "Load more" button but they don't
317                 # have more videos
318                 break
319             more_widget_html = more['load_more_widget_html']
320
321
322 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
323     def _process_page(self, content):
324         for video_id, video_title in self.extract_videos_from_page(content):
325             yield self.url_result(video_id, 'Youtube', video_id, video_title)
326
327     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
328         for mobj in re.finditer(video_re, page):
329             # The link with index 0 is not the first video of the playlist (not sure if still actual)
330             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
331                 continue
332             video_id = mobj.group('id')
333             video_title = unescapeHTML(
334                 mobj.group('title')) if 'title' in mobj.groupdict() else None
335             if video_title:
336                 video_title = video_title.strip()
337             if video_title == '► Play all':
338                 video_title = None
339             try:
340                 idx = ids_in_page.index(video_id)
341                 if video_title and not titles_in_page[idx]:
342                     titles_in_page[idx] = video_title
343             except ValueError:
344                 ids_in_page.append(video_id)
345                 titles_in_page.append(video_title)
346
347     def extract_videos_from_page(self, page):
348         ids_in_page = []
349         titles_in_page = []
350         self.extract_videos_from_page_impl(
351             self._VIDEO_RE, page, ids_in_page, titles_in_page)
352         return zip(ids_in_page, titles_in_page)
353
354
355 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
356     def _process_page(self, content):
357         for playlist_id in orderedSet(re.findall(
358                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
359                 content)):
360             yield self.url_result(
361                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
362
363     def _real_extract(self, url):
364         playlist_id = self._match_id(url)
365         webpage = self._download_webpage(url, playlist_id)
366         title = self._og_search_title(webpage, fatal=False)
367         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
368
369
370 class YoutubeIE(YoutubeBaseInfoExtractor):
371     IE_DESC = 'YouTube.com'
372     _VALID_URL = r"""(?x)^
373                      (
374                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
375                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
376                             (?:www\.)?deturl\.com/www\.youtube\.com/|
377                             (?:www\.)?pwnyoutube\.com/|
378                             (?:www\.)?hooktube\.com/|
379                             (?:www\.)?yourepeat\.com/|
380                             tube\.majestyc\.net/|
381                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
382                             (?:(?:www|dev)\.)?invidio\.us/|
383                             (?:(?:www|no)\.)?invidiou\.sh/|
384                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
385                             (?:www\.)?invidious\.kabi\.tk/|
386                             (?:www\.)?invidious\.13ad\.de/|
387                             (?:www\.)?invidious\.mastodon\.host/|
388                             (?:www\.)?invidious\.nixnet\.xyz/|
389                             (?:www\.)?invidious\.drycat\.fr/|
390                             (?:www\.)?tube\.poal\.co/|
391                             (?:www\.)?vid\.wxzm\.sx/|
392                             (?:www\.)?yt\.elukerio\.org/|
393                             (?:www\.)?yt\.lelux\.fi/|
394                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
395                             (?:www\.)?qklhadlycap4cnod\.onion/|
396                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
397                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
398                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
399                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
400                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
401                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
402                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
403                          (?:                                                  # the various things that can precede the ID:
404                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
405                              |(?:                                             # or the v= param in all its forms
406                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
407                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
408                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
409                                  v=
410                              )
411                          ))
412                          |(?:
413                             youtu\.be|                                        # just youtu.be/xxxx
414                             vid\.plus|                                        # or vid.plus/xxxx
415                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
416                          )/
417                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
418                          )
419                      )?                                                       # all until now is optional -> you can pass the naked ID
420                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
421                      (?!.*?\blist=
422                         (?:
423                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
424                             WL                                                # WL are handled by the watch later IE
425                         )
426                      )
427                      (?(1).+)?                                                # if we found the ID, everything can follow
428                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
429     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
430     _formats = {
431         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
432         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
433         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
434         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
435         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
436         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
437         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
438         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
439         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
440         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
441         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
442         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
443         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
444         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
445         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
446         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
447         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
448         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
449
450
451         # 3D videos
452         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
453         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
454         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
455         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
456         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
457         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
458         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
459
460         # Apple HTTP Live Streaming
461         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
462         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
463         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
464         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
465         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
466         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
467         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
468         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
469
470         # DASH mp4 video
471         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
472         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
473         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
474         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
475         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
476         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
477         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
478         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
479         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
480         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
481         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
482         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
483
484         # Dash mp4 audio
485         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
486         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
487         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
488         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
489         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
490         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
491         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
492
493         # Dash webm
494         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
495         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
496         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
497         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
500         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
501         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
503         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
504         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
509         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
510         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
511         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
512         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
513         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
514         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
515         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
516
517         # Dash webm audio
518         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
519         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
520
521         # Dash webm audio with opus inside
522         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
523         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
524         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
525
526         # RTMP (unnamed)
527         '_rtmp': {'protocol': 'rtmp'},
528
529         # av01 video only formats sometimes served with "unknown" codecs
530         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
531         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
532         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
533         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
534     }
535     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
536
537     _GEO_BYPASS = False
538
539     IE_NAME = 'youtube'
540     _TESTS = [
541         {
542             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
543             'info_dict': {
544                 'id': 'BaW_jenozKc',
545                 'ext': 'mp4',
546                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
547                 'uploader': 'Philipp Hagemeister',
548                 'uploader_id': 'phihag',
549                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
550                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
551                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
552                 'upload_date': '20121002',
553                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
554                 'categories': ['Science & Technology'],
555                 'tags': ['youtube-dl'],
556                 'duration': 10,
557                 'view_count': int,
558                 'like_count': int,
559                 'dislike_count': int,
560                 'start_time': 1,
561                 'end_time': 9,
562             }
563         },
564         {
565             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
566             'note': 'Test generic use_cipher_signature video (#897)',
567             'info_dict': {
568                 'id': 'UxxajLWwzqY',
569                 'ext': 'mp4',
570                 'upload_date': '20120506',
571                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
572                 'alt_title': 'I Love It (feat. Charli XCX)',
573                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
574                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
575                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
576                          'iconic ep', 'iconic', 'love', 'it'],
577                 'duration': 180,
578                 'uploader': 'Icona Pop',
579                 'uploader_id': 'IconaPop',
580                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
581                 'creator': 'Icona Pop',
582                 'track': 'I Love It (feat. Charli XCX)',
583                 'artist': 'Icona Pop',
584             }
585         },
586         {
587             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
588             'note': 'Test VEVO video with age protection (#956)',
589             'info_dict': {
590                 'id': '07FYdnEawAQ',
591                 'ext': 'mp4',
592                 'upload_date': '20130703',
593                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
594                 'alt_title': 'Tunnel Vision',
595                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
596                 'duration': 419,
597                 'uploader': 'justintimberlakeVEVO',
598                 'uploader_id': 'justintimberlakeVEVO',
599                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
600                 'creator': 'Justin Timberlake',
601                 'track': 'Tunnel Vision',
602                 'artist': 'Justin Timberlake',
603                 'age_limit': 18,
604             }
605         },
606         {
607             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
608             'note': 'Embed-only video (#1746)',
609             'info_dict': {
610                 'id': 'yZIXLfi8CZQ',
611                 'ext': 'mp4',
612                 'upload_date': '20120608',
613                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
614                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
615                 'uploader': 'SET India',
616                 'uploader_id': 'setindia',
617                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
618                 'age_limit': 18,
619             }
620         },
621         {
622             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
623             'note': 'Use the first video ID in the URL',
624             'info_dict': {
625                 'id': 'BaW_jenozKc',
626                 'ext': 'mp4',
627                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
628                 'uploader': 'Philipp Hagemeister',
629                 'uploader_id': 'phihag',
630                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
631                 'upload_date': '20121002',
632                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
633                 'categories': ['Science & Technology'],
634                 'tags': ['youtube-dl'],
635                 'duration': 10,
636                 'view_count': int,
637                 'like_count': int,
638                 'dislike_count': int,
639             },
640             'params': {
641                 'skip_download': True,
642             },
643         },
644         {
645             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
646             'note': '256k DASH audio (format 141) via DASH manifest',
647             'info_dict': {
648                 'id': 'a9LDPn-MO4I',
649                 'ext': 'm4a',
650                 'upload_date': '20121002',
651                 'uploader_id': '8KVIDEO',
652                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
653                 'description': '',
654                 'uploader': '8KVIDEO',
655                 'title': 'UHDTV TEST 8K VIDEO.mp4'
656             },
657             'params': {
658                 'youtube_include_dash_manifest': True,
659                 'format': '141',
660             },
661             'skip': 'format 141 not served anymore',
662         },
663         # DASH manifest with encrypted signature
664         {
665             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
666             'info_dict': {
667                 'id': 'IB3lcPjvWLA',
668                 'ext': 'm4a',
669                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
670                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
671                 'duration': 244,
672                 'uploader': 'AfrojackVEVO',
673                 'uploader_id': 'AfrojackVEVO',
674                 'upload_date': '20131011',
675             },
676             'params': {
677                 'youtube_include_dash_manifest': True,
678                 'format': '141/bestaudio[ext=m4a]',
679             },
680         },
681         # JS player signature function name containing $
682         {
683             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
684             'info_dict': {
685                 'id': 'nfWlot6h_JM',
686                 'ext': 'm4a',
687                 'title': 'Taylor Swift - Shake It Off',
688                 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
689                 'duration': 242,
690                 'uploader': 'TaylorSwiftVEVO',
691                 'uploader_id': 'TaylorSwiftVEVO',
692                 'upload_date': '20140818',
693                 'creator': 'Taylor Swift',
694             },
695             'params': {
696                 'youtube_include_dash_manifest': True,
697                 'format': '141/bestaudio[ext=m4a]',
698             },
699         },
700         # Controversy video
701         {
702             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
703             'info_dict': {
704                 'id': 'T4XJQO3qol8',
705                 'ext': 'mp4',
706                 'duration': 219,
707                 'upload_date': '20100909',
708                 'uploader': 'Amazing Atheist',
709                 'uploader_id': 'TheAmazingAtheist',
710                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
711                 'title': 'Burning Everyone\'s Koran',
712                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
713             }
714         },
715         # Normal age-gate video (No vevo, embed allowed)
716         {
717             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
718             'info_dict': {
719                 'id': 'HtVdAasjOgU',
720                 'ext': 'mp4',
721                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
722                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
723                 'duration': 142,
724                 'uploader': 'The Witcher',
725                 'uploader_id': 'WitcherGame',
726                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
727                 'upload_date': '20140605',
728                 'age_limit': 18,
729             },
730         },
731         # Age-gate video with encrypted signature
732         {
733             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
734             'info_dict': {
735                 'id': '6kLq3WMV1nU',
736                 'ext': 'mp4',
737                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
738                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
739                 'duration': 246,
740                 'uploader': 'LloydVEVO',
741                 'uploader_id': 'LloydVEVO',
742                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
743                 'upload_date': '20110629',
744                 'age_limit': 18,
745             },
746         },
747         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
748         # YouTube Red ad is not captured for creator
749         {
750             'url': '__2ABJjxzNo',
751             'info_dict': {
752                 'id': '__2ABJjxzNo',
753                 'ext': 'mp4',
754                 'duration': 266,
755                 'upload_date': '20100430',
756                 'uploader_id': 'deadmau5',
757                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
758                 'creator': 'deadmau5',
759                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
760                 'uploader': 'deadmau5',
761                 'title': 'Deadmau5 - Some Chords (HD)',
762                 'alt_title': 'Some Chords',
763             },
764             'expected_warnings': [
765                 'DASH manifest missing',
766             ]
767         },
768         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
769         {
770             'url': 'lqQg6PlCWgI',
771             'info_dict': {
772                 'id': 'lqQg6PlCWgI',
773                 'ext': 'mp4',
774                 'duration': 6085,
775                 'upload_date': '20150827',
776                 'uploader_id': 'olympic',
777                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
778                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
779                 'uploader': 'Olympic',
780                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
781             },
782             'params': {
783                 'skip_download': 'requires avconv',
784             }
785         },
786         # Non-square pixels
787         {
788             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
789             'info_dict': {
790                 'id': '_b-2C3KPAM0',
791                 'ext': 'mp4',
792                 'stretched_ratio': 16 / 9.,
793                 'duration': 85,
794                 'upload_date': '20110310',
795                 'uploader_id': 'AllenMeow',
796                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
797                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
798                 'uploader': '孫ᄋᄅ',
799                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
800             },
801         },
802         # url_encoded_fmt_stream_map is empty string
803         {
804             'url': 'qEJwOuvDf7I',
805             'info_dict': {
806                 'id': 'qEJwOuvDf7I',
807                 'ext': 'webm',
808                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
809                 'description': '',
810                 'upload_date': '20150404',
811                 'uploader_id': 'spbelect',
812                 'uploader': 'Наблюдатели Петербурга',
813             },
814             'params': {
815                 'skip_download': 'requires avconv',
816             },
817             'skip': 'This live event has ended.',
818         },
819         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
820         {
821             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
822             'info_dict': {
823                 'id': 'FIl7x6_3R5Y',
824                 'ext': 'webm',
825                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
826                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
827                 'duration': 220,
828                 'upload_date': '20150625',
829                 'uploader_id': 'dorappi2000',
830                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
831                 'uploader': 'dorappi2000',
832                 'formats': 'mincount:31',
833             },
834             'skip': 'not actual anymore',
835         },
836         # DASH manifest with segment_list
837         {
838             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
839             'md5': '8ce563a1d667b599d21064e982ab9e31',
840             'info_dict': {
841                 'id': 'CsmdDsKjzN8',
842                 'ext': 'mp4',
843                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
844                 'uploader': 'Airtek',
845                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
846                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
847                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
848             },
849             'params': {
850                 'youtube_include_dash_manifest': True,
851                 'format': '135',  # bestvideo
852             },
853             'skip': 'This live event has ended.',
854         },
855         {
856             # Multifeed videos (multiple cameras), URL is for Main Camera
857             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
858             'info_dict': {
859                 'id': 'jqWvoWXjCVs',
860                 'title': 'teamPGP: Rocket League Noob Stream',
861                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
862             },
863             'playlist': [{
864                 'info_dict': {
865                     'id': 'jqWvoWXjCVs',
866                     'ext': 'mp4',
867                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
868                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
869                     'duration': 7335,
870                     'upload_date': '20150721',
871                     'uploader': 'Beer Games Beer',
872                     'uploader_id': 'beergamesbeer',
873                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
874                     'license': 'Standard YouTube License',
875                 },
876             }, {
877                 'info_dict': {
878                     'id': '6h8e8xoXJzg',
879                     'ext': 'mp4',
880                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
881                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
882                     'duration': 7337,
883                     'upload_date': '20150721',
884                     'uploader': 'Beer Games Beer',
885                     'uploader_id': 'beergamesbeer',
886                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
887                     'license': 'Standard YouTube License',
888                 },
889             }, {
890                 'info_dict': {
891                     'id': 'PUOgX5z9xZw',
892                     'ext': 'mp4',
893                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
894                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
895                     'duration': 7337,
896                     'upload_date': '20150721',
897                     'uploader': 'Beer Games Beer',
898                     'uploader_id': 'beergamesbeer',
899                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
900                     'license': 'Standard YouTube License',
901                 },
902             }, {
903                 'info_dict': {
904                     'id': 'teuwxikvS5k',
905                     'ext': 'mp4',
906                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
907                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
908                     'duration': 7334,
909                     'upload_date': '20150721',
910                     'uploader': 'Beer Games Beer',
911                     'uploader_id': 'beergamesbeer',
912                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
913                     'license': 'Standard YouTube License',
914                 },
915             }],
916             'params': {
917                 'skip_download': True,
918             },
919             'skip': 'This video is not available.',
920         },
921         {
922             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
923             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
924             'info_dict': {
925                 'id': 'gVfLd0zydlo',
926                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
927             },
928             'playlist_count': 2,
929             'skip': 'Not multifeed anymore',
930         },
931         {
932             'url': 'https://vid.plus/FlRa-iH7PGw',
933             'only_matching': True,
934         },
935         {
936             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
937             'only_matching': True,
938         },
939         {
940             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
941             # Also tests cut-off URL expansion in video description (see
942             # https://github.com/ytdl-org/youtube-dl/issues/1892,
943             # https://github.com/ytdl-org/youtube-dl/issues/8164)
944             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
945             'info_dict': {
946                 'id': 'lsguqyKfVQg',
947                 'ext': 'mp4',
948                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
949                 'alt_title': 'Dark Walk - Position Music',
950                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
951                 'duration': 133,
952                 'upload_date': '20151119',
953                 'uploader_id': 'IronSoulElf',
954                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
955                 'uploader': 'IronSoulElf',
956                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
957                 'track': 'Dark Walk - Position Music',
958                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
959                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
960             },
961             'params': {
962                 'skip_download': True,
963             },
964         },
965         {
966             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
967             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
968             'only_matching': True,
969         },
970         {
971             # Video with yt:stretch=17:0
972             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
973             'info_dict': {
974                 'id': 'Q39EVAstoRM',
975                 'ext': 'mp4',
976                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
977                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
978                 'upload_date': '20151107',
979                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
980                 'uploader': 'CH GAMER DROID',
981             },
982             'params': {
983                 'skip_download': True,
984             },
985             'skip': 'This video does not exist.',
986         },
987         {
988             # Video licensed under Creative Commons
989             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
990             'info_dict': {
991                 'id': 'M4gD1WSo5mA',
992                 'ext': 'mp4',
993                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
994                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
995                 'duration': 721,
996                 'upload_date': '20150127',
997                 'uploader_id': 'BerkmanCenter',
998                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
999                 'uploader': 'The Berkman Klein Center for Internet & Society',
1000                 'license': 'Creative Commons Attribution license (reuse allowed)',
1001             },
1002             'params': {
1003                 'skip_download': True,
1004             },
1005         },
1006         {
1007             # Channel-like uploader_url
1008             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1009             'info_dict': {
1010                 'id': 'eQcmzGIKrzg',
1011                 'ext': 'mp4',
1012                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1013                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1014                 'duration': 4060,
1015                 'upload_date': '20151119',
1016                 'uploader': 'Bernie Sanders',
1017                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1018                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1019                 'license': 'Creative Commons Attribution license (reuse allowed)',
1020             },
1021             'params': {
1022                 'skip_download': True,
1023             },
1024         },
1025         {
1026             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1027             'only_matching': True,
1028         },
1029         {
1030             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1031             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1032             'only_matching': True,
1033         },
1034         {
1035             # Rental video preview
1036             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1037             'info_dict': {
1038                 'id': 'uGpuVWrhIzE',
1039                 'ext': 'mp4',
1040                 'title': 'Piku - Trailer',
1041                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1042                 'upload_date': '20150811',
1043                 'uploader': 'FlixMatrix',
1044                 'uploader_id': 'FlixMatrixKaravan',
1045                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1046                 'license': 'Standard YouTube License',
1047             },
1048             'params': {
1049                 'skip_download': True,
1050             },
1051             'skip': 'This video is not available.',
1052         },
1053         {
1054             # YouTube Red video with episode data
1055             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1056             'info_dict': {
1057                 'id': 'iqKdEhx-dD4',
1058                 'ext': 'mp4',
1059                 'title': 'Isolation - Mind Field (Ep 1)',
1060                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1061                 'duration': 2085,
1062                 'upload_date': '20170118',
1063                 'uploader': 'Vsauce',
1064                 'uploader_id': 'Vsauce',
1065                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1066                 'series': 'Mind Field',
1067                 'season_number': 1,
1068                 'episode_number': 1,
1069             },
1070             'params': {
1071                 'skip_download': True,
1072             },
1073             'expected_warnings': [
1074                 'Skipping DASH manifest',
1075             ],
1076         },
1077         {
1078             # The following content has been identified by the YouTube community
1079             # as inappropriate or offensive to some audiences.
1080             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1081             'info_dict': {
1082                 'id': '6SJNVb0GnPI',
1083                 'ext': 'mp4',
1084                 'title': 'Race Differences in Intelligence',
1085                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1086                 'duration': 965,
1087                 'upload_date': '20140124',
1088                 'uploader': 'New Century Foundation',
1089                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1090                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1091             },
1092             'params': {
1093                 'skip_download': True,
1094             },
1095         },
1096         {
1097             # itag 212
1098             'url': '1t24XAntNCY',
1099             'only_matching': True,
1100         },
1101         {
1102             # geo restricted to JP
1103             'url': 'sJL6WA-aGkQ',
1104             'only_matching': True,
1105         },
1106         {
1107             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1108             'only_matching': True,
1109         },
1110         {
1111             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1112             'only_matching': True,
1113         },
1114         {
1115             # DRM protected
1116             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1117             'only_matching': True,
1118         },
1119         {
1120             # Video with unsupported adaptive stream type formats
1121             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1122             'info_dict': {
1123                 'id': 'Z4Vy8R84T1U',
1124                 'ext': 'mp4',
1125                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1126                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1127                 'duration': 433,
1128                 'upload_date': '20130923',
1129                 'uploader': 'Amelia Putri Harwita',
1130                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1131                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1132                 'formats': 'maxcount:10',
1133             },
1134             'params': {
1135                 'skip_download': True,
1136                 'youtube_include_dash_manifest': False,
1137             },
1138         },
1139         {
1140             # Youtube Music Auto-generated description
1141             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1142             'info_dict': {
1143                 'id': 'MgNrAu2pzNs',
1144                 'ext': 'mp4',
1145                 'title': 'Voyeur Girl',
1146                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1147                 'upload_date': '20190312',
1148                 'uploader': 'Various Artists - Topic',
1149                 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1150                 'artist': 'Stephen',
1151                 'track': 'Voyeur Girl',
1152                 'album': 'it\'s too much love to know my dear',
1153                 'release_date': '20190313',
1154                 'release_year': 2019,
1155             },
1156             'params': {
1157                 'skip_download': True,
1158             },
1159         },
1160         {
1161             # Youtube Music Auto-generated description
1162             # Retrieve 'artist' field from 'Artist:' in video description
1163             # when it is present on youtube music video
1164             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1165             'info_dict': {
1166                 'id': 'k0jLE7tTwjY',
1167                 'ext': 'mp4',
1168                 'title': 'Latch Feat. Sam Smith',
1169                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1170                 'upload_date': '20150110',
1171                 'uploader': 'Various Artists - Topic',
1172                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1173                 'artist': 'Disclosure',
1174                 'track': 'Latch Feat. Sam Smith',
1175                 'album': 'Latch Featuring Sam Smith',
1176                 'release_date': '20121008',
1177                 'release_year': 2012,
1178             },
1179             'params': {
1180                 'skip_download': True,
1181             },
1182         },
1183         {
1184             # Youtube Music Auto-generated description
1185             # handle multiple artists on youtube music video
1186             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1187             'info_dict': {
1188                 'id': '74qn0eJSjpA',
1189                 'ext': 'mp4',
1190                 'title': 'Eastside',
1191                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1192                 'upload_date': '20180710',
1193                 'uploader': 'Benny Blanco - Topic',
1194                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1195                 'artist': 'benny blanco, Halsey, Khalid',
1196                 'track': 'Eastside',
1197                 'album': 'Eastside',
1198                 'release_date': '20180713',
1199                 'release_year': 2018,
1200             },
1201             'params': {
1202                 'skip_download': True,
1203             },
1204         },
1205         {
1206             # Youtube Music Auto-generated description
1207             # handle youtube music video with release_year and no release_date
1208             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1209             'info_dict': {
1210                 'id': '-hcAI0g-f5M',
1211                 'ext': 'mp4',
1212                 'title': 'Put It On Me',
1213                 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1214                 'upload_date': '20180426',
1215                 'uploader': 'Matt Maeson - Topic',
1216                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1217                 'artist': 'Matt Maeson',
1218                 'track': 'Put It On Me',
1219                 'album': 'The Hearse',
1220                 'release_date': None,
1221                 'release_year': 2018,
1222             },
1223             'params': {
1224                 'skip_download': True,
1225             },
1226         },
1227         {
1228             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1229             'only_matching': True,
1230         },
1231     ]
1232
1233     def __init__(self, *args, **kwargs):
1234         super(YoutubeIE, self).__init__(*args, **kwargs)
1235         self._player_cache = {}
1236
1237     def report_video_info_webpage_download(self, video_id):
1238         """Report attempt to download video info webpage."""
1239         self.to_screen('%s: Downloading video info webpage' % video_id)
1240
1241     def report_information_extraction(self, video_id):
1242         """Report attempt to extract video information."""
1243         self.to_screen('%s: Extracting video information' % video_id)
1244
1245     def report_unavailable_format(self, video_id, format):
1246         """Report extracted video URL."""
1247         self.to_screen('%s: Format %s not available' % (video_id, format))
1248
1249     def report_rtmp_download(self):
1250         """Indicate the download will use the RTMP protocol."""
1251         self.to_screen('RTMP download detected')
1252
1253     def _signature_cache_id(self, example_sig):
1254         """ Return a string representation of a signature """
1255         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1256
1257     def _extract_signature_function(self, video_id, player_url, example_sig):
1258         id_m = re.match(
1259             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1260             player_url)
1261         if not id_m:
1262             raise ExtractorError('Cannot identify player %r' % player_url)
1263         player_type = id_m.group('ext')
1264         player_id = id_m.group('id')
1265
1266         # Read from filesystem cache
1267         func_id = '%s_%s_%s' % (
1268             player_type, player_id, self._signature_cache_id(example_sig))
1269         assert os.path.basename(func_id) == func_id
1270
1271         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1272         if cache_spec is not None:
1273             return lambda s: ''.join(s[i] for i in cache_spec)
1274
1275         download_note = (
1276             'Downloading player %s' % player_url
1277             if self._downloader.params.get('verbose') else
1278             'Downloading %s player %s' % (player_type, player_id)
1279         )
1280         if player_type == 'js':
1281             code = self._download_webpage(
1282                 player_url, video_id,
1283                 note=download_note,
1284                 errnote='Download of %s failed' % player_url)
1285             res = self._parse_sig_js(code)
1286         elif player_type == 'swf':
1287             urlh = self._request_webpage(
1288                 player_url, video_id,
1289                 note=download_note,
1290                 errnote='Download of %s failed' % player_url)
1291             code = urlh.read()
1292             res = self._parse_sig_swf(code)
1293         else:
1294             assert False, 'Invalid player type %r' % player_type
1295
1296         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1297         cache_res = res(test_string)
1298         cache_spec = [ord(c) for c in cache_res]
1299
1300         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1301         return res
1302
1303     def _print_sig_code(self, func, example_sig):
1304         def gen_sig_code(idxs):
1305             def _genslice(start, end, step):
1306                 starts = '' if start == 0 else str(start)
1307                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1308                 steps = '' if step == 1 else (':%d' % step)
1309                 return 's[%s%s%s]' % (starts, ends, steps)
1310
1311             step = None
1312             # Quelch pyflakes warnings - start will be set when step is set
1313             start = '(Never used)'
1314             for i, prev in zip(idxs[1:], idxs[:-1]):
1315                 if step is not None:
1316                     if i - prev == step:
1317                         continue
1318                     yield _genslice(start, prev, step)
1319                     step = None
1320                     continue
1321                 if i - prev in [-1, 1]:
1322                     step = i - prev
1323                     start = prev
1324                     continue
1325                 else:
1326                     yield 's[%d]' % prev
1327             if step is None:
1328                 yield 's[%d]' % i
1329             else:
1330                 yield _genslice(start, i, step)
1331
1332         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1333         cache_res = func(test_string)
1334         cache_spec = [ord(c) for c in cache_res]
1335         expr_code = ' + '.join(gen_sig_code(cache_spec))
1336         signature_id_tuple = '(%s)' % (
1337             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1338         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1339                 '    return %s\n') % (signature_id_tuple, expr_code)
1340         self.to_screen('Extracted signature function:\n' + code)
1341
1342     def _parse_sig_js(self, jscode):
1343         funcname = self._search_regex(
1344             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1345              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1346              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1347              # Obsolete patterns
1348              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1349              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1350              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1351              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1352              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1353              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1354              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1355              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1356             jscode, 'Initial JS player signature function name', group='sig')
1357
1358         jsi = JSInterpreter(jscode)
1359         initial_function = jsi.extract_function(funcname)
1360         return lambda s: initial_function([s])
1361
1362     def _parse_sig_swf(self, file_contents):
1363         swfi = SWFInterpreter(file_contents)
1364         TARGET_CLASSNAME = 'SignatureDecipher'
1365         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1366         initial_function = swfi.extract_function(searched_class, 'decipher')
1367         return lambda s: initial_function([s])
1368
1369     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1370         """Turn the encrypted s field into a working signature"""
1371
1372         if player_url is None:
1373             raise ExtractorError('Cannot decrypt signature without player_url')
1374
1375         if player_url.startswith('//'):
1376             player_url = 'https:' + player_url
1377         elif not re.match(r'https?://', player_url):
1378             player_url = compat_urlparse.urljoin(
1379                 'https://www.youtube.com', player_url)
1380         try:
1381             player_id = (player_url, self._signature_cache_id(s))
1382             if player_id not in self._player_cache:
1383                 func = self._extract_signature_function(
1384                     video_id, player_url, s
1385                 )
1386                 self._player_cache[player_id] = func
1387             func = self._player_cache[player_id]
1388             if self._downloader.params.get('youtube_print_sig_code'):
1389                 self._print_sig_code(func, s)
1390             return func(s)
1391         except Exception as e:
1392             tb = traceback.format_exc()
1393             raise ExtractorError(
1394                 'Signature extraction failed: ' + tb, cause=e)
1395
1396     def _get_subtitles(self, video_id, webpage):
1397         try:
1398             subs_doc = self._download_xml(
1399                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1400                 video_id, note=False)
1401         except ExtractorError as err:
1402             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1403             return {}
1404
1405         sub_lang_list = {}
1406         for track in subs_doc.findall('track'):
1407             lang = track.attrib['lang_code']
1408             if lang in sub_lang_list:
1409                 continue
1410             sub_formats = []
1411             for ext in self._SUBTITLE_FORMATS:
1412                 params = compat_urllib_parse_urlencode({
1413                     'lang': lang,
1414                     'v': video_id,
1415                     'fmt': ext,
1416                     'name': track.attrib['name'].encode('utf-8'),
1417                 })
1418                 sub_formats.append({
1419                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1420                     'ext': ext,
1421                 })
1422             sub_lang_list[lang] = sub_formats
1423         if not sub_lang_list:
1424             self._downloader.report_warning('video doesn\'t have subtitles')
1425             return {}
1426         return sub_lang_list
1427
1428     def _get_ytplayer_config(self, video_id, webpage):
1429         patterns = (
1430             # User data may contain arbitrary character sequences that may affect
1431             # JSON extraction with regex, e.g. when '};' is contained the second
1432             # regex won't capture the whole JSON. Yet working around by trying more
1433             # concrete regex first keeping in mind proper quoted string handling
1434             # to be implemented in future that will replace this workaround (see
1435             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1436             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1437             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1438             r';ytplayer\.config\s*=\s*({.+?});',
1439         )
1440         config = self._search_regex(
1441             patterns, webpage, 'ytplayer.config', default=None)
1442         if config:
1443             return self._parse_json(
1444                 uppercase_escape(config), video_id, fatal=False)
1445
1446     def _get_automatic_captions(self, video_id, webpage):
1447         """We need the webpage for getting the captions url, pass it as an
1448            argument to speed up the process."""
1449         self.to_screen('%s: Looking for automatic captions' % video_id)
1450         player_config = self._get_ytplayer_config(video_id, webpage)
1451         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1452         if not player_config:
1453             self._downloader.report_warning(err_msg)
1454             return {}
1455         try:
1456             args = player_config['args']
1457             caption_url = args.get('ttsurl')
1458             if caption_url:
1459                 timestamp = args['timestamp']
1460                 # We get the available subtitles
1461                 list_params = compat_urllib_parse_urlencode({
1462                     'type': 'list',
1463                     'tlangs': 1,
1464                     'asrs': 1,
1465                 })
1466                 list_url = caption_url + '&' + list_params
1467                 caption_list = self._download_xml(list_url, video_id)
1468                 original_lang_node = caption_list.find('track')
1469                 if original_lang_node is None:
1470                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1471                     return {}
1472                 original_lang = original_lang_node.attrib['lang_code']
1473                 caption_kind = original_lang_node.attrib.get('kind', '')
1474
1475                 sub_lang_list = {}
1476                 for lang_node in caption_list.findall('target'):
1477                     sub_lang = lang_node.attrib['lang_code']
1478                     sub_formats = []
1479                     for ext in self._SUBTITLE_FORMATS:
1480                         params = compat_urllib_parse_urlencode({
1481                             'lang': original_lang,
1482                             'tlang': sub_lang,
1483                             'fmt': ext,
1484                             'ts': timestamp,
1485                             'kind': caption_kind,
1486                         })
1487                         sub_formats.append({
1488                             'url': caption_url + '&' + params,
1489                             'ext': ext,
1490                         })
1491                     sub_lang_list[sub_lang] = sub_formats
1492                 return sub_lang_list
1493
1494             def make_captions(sub_url, sub_langs):
1495                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1496                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1497                 captions = {}
1498                 for sub_lang in sub_langs:
1499                     sub_formats = []
1500                     for ext in self._SUBTITLE_FORMATS:
1501                         caption_qs.update({
1502                             'tlang': [sub_lang],
1503                             'fmt': [ext],
1504                         })
1505                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1506                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1507                         sub_formats.append({
1508                             'url': sub_url,
1509                             'ext': ext,
1510                         })
1511                     captions[sub_lang] = sub_formats
1512                 return captions
1513
1514             # New captions format as of 22.06.2017
1515             player_response = args.get('player_response')
1516             if player_response and isinstance(player_response, compat_str):
1517                 player_response = self._parse_json(
1518                     player_response, video_id, fatal=False)
1519                 if player_response:
1520                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1521                     base_url = renderer['captionTracks'][0]['baseUrl']
1522                     sub_lang_list = []
1523                     for lang in renderer['translationLanguages']:
1524                         lang_code = lang.get('languageCode')
1525                         if lang_code:
1526                             sub_lang_list.append(lang_code)
1527                     return make_captions(base_url, sub_lang_list)
1528
1529             # Some videos don't provide ttsurl but rather caption_tracks and
1530             # caption_translation_languages (e.g. 20LmZk1hakA)
1531             # Does not used anymore as of 22.06.2017
1532             caption_tracks = args['caption_tracks']
1533             caption_translation_languages = args['caption_translation_languages']
1534             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1535             sub_lang_list = []
1536             for lang in caption_translation_languages.split(','):
1537                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1538                 sub_lang = lang_qs.get('lc', [None])[0]
1539                 if sub_lang:
1540                     sub_lang_list.append(sub_lang)
1541             return make_captions(caption_url, sub_lang_list)
1542         # An extractor error can be raise by the download process if there are
1543         # no automatic captions but there are subtitles
1544         except (KeyError, IndexError, ExtractorError):
1545             self._downloader.report_warning(err_msg)
1546             return {}
1547
1548     def _mark_watched(self, video_id, video_info, player_response):
1549         playback_url = url_or_none(try_get(
1550             player_response,
1551             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1552             video_info, lambda x: x['videostats_playback_base_url'][0]))
1553         if not playback_url:
1554             return
1555         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1556         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1557
1558         # cpn generation algorithm is reverse engineered from base.js.
1559         # In fact it works even with dummy cpn.
1560         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1561         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1562
1563         qs.update({
1564             'ver': ['2'],
1565             'cpn': [cpn],
1566         })
1567         playback_url = compat_urlparse.urlunparse(
1568             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1569
1570         self._download_webpage(
1571             playback_url, video_id, 'Marking watched',
1572             'Unable to mark watched', fatal=False)
1573
1574     @staticmethod
1575     def _extract_urls(webpage):
1576         # Embedded YouTube player
1577         entries = [
1578             unescapeHTML(mobj.group('url'))
1579             for mobj in re.finditer(r'''(?x)
1580             (?:
1581                 <iframe[^>]+?src=|
1582                 data-video-url=|
1583                 <embed[^>]+?src=|
1584                 embedSWF\(?:\s*|
1585                 <object[^>]+data=|
1586                 new\s+SWFObject\(
1587             )
1588             (["\'])
1589                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1590                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1591             \1''', webpage)]
1592
1593         # lazyYT YouTube embed
1594         entries.extend(list(map(
1595             unescapeHTML,
1596             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1597
1598         # Wordpress "YouTube Video Importer" plugin
1599         matches = re.findall(r'''(?x)<div[^>]+
1600             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1601             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1602         entries.extend(m[-1] for m in matches)
1603
1604         return entries
1605
1606     @staticmethod
1607     def _extract_url(webpage):
1608         urls = YoutubeIE._extract_urls(webpage)
1609         return urls[0] if urls else None
1610
1611     @classmethod
1612     def extract_id(cls, url):
1613         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1614         if mobj is None:
1615             raise ExtractorError('Invalid URL: %s' % url)
1616         video_id = mobj.group(2)
1617         return video_id
1618
1619     @staticmethod
1620     def _extract_chapters(description, duration):
1621         if not description:
1622             return None
1623         chapter_lines = re.findall(
1624             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1625             description)
1626         if not chapter_lines:
1627             return None
1628         chapters = []
1629         for next_num, (chapter_line, time_point) in enumerate(
1630                 chapter_lines, start=1):
1631             start_time = parse_duration(time_point)
1632             if start_time is None:
1633                 continue
1634             if start_time > duration:
1635                 break
1636             end_time = (duration if next_num == len(chapter_lines)
1637                         else parse_duration(chapter_lines[next_num][1]))
1638             if end_time is None:
1639                 continue
1640             if end_time > duration:
1641                 end_time = duration
1642             if start_time > end_time:
1643                 break
1644             chapter_title = re.sub(
1645                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1646             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1647             chapters.append({
1648                 'start_time': start_time,
1649                 'end_time': end_time,
1650                 'title': chapter_title,
1651             })
1652         return chapters
1653
1654     def _real_extract(self, url):
1655         url, smuggled_data = unsmuggle_url(url, {})
1656
1657         proto = (
1658             'http' if self._downloader.params.get('prefer_insecure', False)
1659             else 'https')
1660
1661         start_time = None
1662         end_time = None
1663         parsed_url = compat_urllib_parse_urlparse(url)
1664         for component in [parsed_url.fragment, parsed_url.query]:
1665             query = compat_parse_qs(component)
1666             if start_time is None and 't' in query:
1667                 start_time = parse_duration(query['t'][0])
1668             if start_time is None and 'start' in query:
1669                 start_time = parse_duration(query['start'][0])
1670             if end_time is None and 'end' in query:
1671                 end_time = parse_duration(query['end'][0])
1672
1673         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1674         mobj = re.search(self._NEXT_URL_RE, url)
1675         if mobj:
1676             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1677         video_id = self.extract_id(url)
1678
1679         # Get video webpage
1680         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1681         video_webpage = self._download_webpage(url, video_id)
1682
1683         # Attempt to extract SWF player URL
1684         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1685         if mobj is not None:
1686             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1687         else:
1688             player_url = None
1689
1690         dash_mpds = []
1691
1692         def add_dash_mpd(video_info):
1693             dash_mpd = video_info.get('dashmpd')
1694             if dash_mpd and dash_mpd[0] not in dash_mpds:
1695                 dash_mpds.append(dash_mpd[0])
1696
1697         def add_dash_mpd_pr(pl_response):
1698             dash_mpd = url_or_none(try_get(
1699                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1700                 compat_str))
1701             if dash_mpd and dash_mpd not in dash_mpds:
1702                 dash_mpds.append(dash_mpd)
1703
1704         is_live = None
1705         view_count = None
1706
1707         def extract_view_count(v_info):
1708             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1709
1710         def extract_token(v_info):
1711             return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1712
1713         def extract_player_response(player_response, video_id):
1714             pl_response = str_or_none(player_response)
1715             if not pl_response:
1716                 return
1717             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1718             if isinstance(pl_response, dict):
1719                 add_dash_mpd_pr(pl_response)
1720                 return pl_response
1721
1722         player_response = {}
1723
1724         # Get video info
1725         embed_webpage = None
1726         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1727             age_gate = True
1728             # We simulate the access to the video from www.youtube.com/v/{video_id}
1729             # this can be viewed without login into Youtube
1730             url = proto + '://www.youtube.com/embed/%s' % video_id
1731             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1732             data = compat_urllib_parse_urlencode({
1733                 'video_id': video_id,
1734                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1735                 'sts': self._search_regex(
1736                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1737             })
1738             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1739             video_info_webpage = self._download_webpage(
1740                 video_info_url, video_id,
1741                 note='Refetching age-gated info webpage',
1742                 errnote='unable to download video info webpage')
1743             video_info = compat_parse_qs(video_info_webpage)
1744             pl_response = video_info.get('player_response', [None])[0]
1745             player_response = extract_player_response(pl_response, video_id)
1746             add_dash_mpd(video_info)
1747             view_count = extract_view_count(video_info)
1748         else:
1749             age_gate = False
1750             video_info = None
1751             sts = None
1752             # Try looking directly into the video webpage
1753             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1754             if ytplayer_config:
1755                 args = ytplayer_config['args']
1756                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1757                     # Convert to the same format returned by compat_parse_qs
1758                     video_info = dict((k, [v]) for k, v in args.items())
1759                     add_dash_mpd(video_info)
1760                 # Rental video is not rented but preview is available (e.g.
1761                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1762                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1763                 if not video_info and args.get('ypc_vid'):
1764                     return self.url_result(
1765                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1766                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1767                     is_live = True
1768                 sts = ytplayer_config.get('sts')
1769                 if not player_response:
1770                     player_response = extract_player_response(args.get('player_response'), video_id)
1771             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1772                 add_dash_mpd_pr(player_response)
1773                 # We also try looking in get_video_info since it may contain different dashmpd
1774                 # URL that points to a DASH manifest with possibly different itag set (some itags
1775                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1776                 # manifest pointed by get_video_info's dashmpd).
1777                 # The general idea is to take a union of itags of both DASH manifests (for example
1778                 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1779                 self.report_video_info_webpage_download(video_id)
1780                 for el in ('embedded', 'detailpage', 'vevo', ''):
1781                     query = {
1782                         'video_id': video_id,
1783                         'ps': 'default',
1784                         'eurl': '',
1785                         'gl': 'US',
1786                         'hl': 'en',
1787                     }
1788                     if el:
1789                         query['el'] = el
1790                     if sts:
1791                         query['sts'] = sts
1792                     video_info_webpage = self._download_webpage(
1793                         '%s://www.youtube.com/get_video_info' % proto,
1794                         video_id, note=False,
1795                         errnote='unable to download video info webpage',
1796                         fatal=False, query=query)
1797                     if not video_info_webpage:
1798                         continue
1799                     get_video_info = compat_parse_qs(video_info_webpage)
1800                     if not player_response:
1801                         pl_response = get_video_info.get('player_response', [None])[0]
1802                         player_response = extract_player_response(pl_response, video_id)
1803                     add_dash_mpd(get_video_info)
1804                     if view_count is None:
1805                         view_count = extract_view_count(get_video_info)
1806                     if not video_info:
1807                         video_info = get_video_info
1808                     get_token = extract_token(get_video_info)
1809                     if get_token:
1810                         # Different get_video_info requests may report different results, e.g.
1811                         # some may report video unavailability, but some may serve it without
1812                         # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1813                         # the original webpage as well as el=info and el=embedded get_video_info
1814                         # requests report video unavailability due to geo restriction while
1815                         # el=detailpage succeeds and returns valid data). This is probably
1816                         # due to YouTube measures against IP ranges of hosting providers.
1817                         # Working around by preferring the first succeeded video_info containing
1818                         # the token if no such video_info yet was found.
1819                         token = extract_token(video_info)
1820                         if not token:
1821                             video_info = get_video_info
1822                         break
1823
1824         def extract_unavailable_message():
1825             messages = []
1826             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1827                 msg = self._html_search_regex(
1828                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1829                     video_webpage, 'unavailable %s' % kind, default=None)
1830                 if msg:
1831                     messages.append(msg)
1832             if messages:
1833                 return '\n'.join(messages)
1834
1835         if not video_info:
1836             unavailable_message = extract_unavailable_message()
1837             if not unavailable_message:
1838                 unavailable_message = 'Unable to extract video data'
1839             raise ExtractorError(
1840                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1841
1842         video_details = try_get(
1843             player_response, lambda x: x['videoDetails'], dict) or {}
1844
1845         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1846         if not video_title:
1847             self._downloader.report_warning('Unable to extract video title')
1848             video_title = '_'
1849
1850         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1851         if video_description:
1852
1853             def replace_url(m):
1854                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1855                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1856                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1857                     qs = compat_parse_qs(parsed_redir_url.query)
1858                     q = qs.get('q')
1859                     if q and q[0]:
1860                         return q[0]
1861                 return redir_url
1862
1863             description_original = video_description = re.sub(r'''(?x)
1864                 <a\s+
1865                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1866                     (?:title|href)="([^"]+)"\s+
1867                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1868                     class="[^"]*"[^>]*>
1869                 [^<]+\.{3}\s*
1870                 </a>
1871             ''', replace_url, video_description)
1872             video_description = clean_html(video_description)
1873         else:
1874             video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1875
1876         if not smuggled_data.get('force_singlefeed', False):
1877             if not self._downloader.params.get('noplaylist'):
1878                 multifeed_metadata_list = try_get(
1879                     player_response,
1880                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1881                     compat_str) or try_get(
1882                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1883                 if multifeed_metadata_list:
1884                     entries = []
1885                     feed_ids = []
1886                     for feed in multifeed_metadata_list.split(','):
1887                         # Unquote should take place before split on comma (,) since textual
1888                         # fields may contain comma as well (see
1889                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1890                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1891                         entries.append({
1892                             '_type': 'url_transparent',
1893                             'ie_key': 'Youtube',
1894                             'url': smuggle_url(
1895                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1896                                 {'force_singlefeed': True}),
1897                             'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1898                         })
1899                         feed_ids.append(feed_data['id'][0])
1900                     self.to_screen(
1901                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1902                         % (', '.join(feed_ids), video_id))
1903                     return self.playlist_result(entries, video_id, video_title, video_description)
1904             else:
1905                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1906
1907         if view_count is None:
1908             view_count = extract_view_count(video_info)
1909         if view_count is None and video_details:
1910             view_count = int_or_none(video_details.get('viewCount'))
1911
1912         if is_live is None:
1913             is_live = bool_or_none(video_details.get('isLive'))
1914
1915         # Check for "rental" videos
1916         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1917             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1918
1919         def _extract_filesize(media_url):
1920             return int_or_none(self._search_regex(
1921                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1922
1923         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1924         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1925
1926         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1927             self.report_rtmp_download()
1928             formats = [{
1929                 'format_id': '_rtmp',
1930                 'protocol': 'rtmp',
1931                 'url': video_info['conn'][0],
1932                 'player_url': player_url,
1933             }]
1934         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1935             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1936             if 'rtmpe%3Dyes' in encoded_url_map:
1937                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1938             formats = []
1939             formats_spec = {}
1940             fmt_list = video_info.get('fmt_list', [''])[0]
1941             if fmt_list:
1942                 for fmt in fmt_list.split(','):
1943                     spec = fmt.split('/')
1944                     if len(spec) > 1:
1945                         width_height = spec[1].split('x')
1946                         if len(width_height) == 2:
1947                             formats_spec[spec[0]] = {
1948                                 'resolution': spec[1],
1949                                 'width': int_or_none(width_height[0]),
1950                                 'height': int_or_none(width_height[1]),
1951                             }
1952             for fmt in streaming_formats:
1953                 itag = str_or_none(fmt.get('itag'))
1954                 if not itag:
1955                     continue
1956                 quality = fmt.get('quality')
1957                 quality_label = fmt.get('qualityLabel') or quality
1958                 formats_spec[itag] = {
1959                     'asr': int_or_none(fmt.get('audioSampleRate')),
1960                     'filesize': int_or_none(fmt.get('contentLength')),
1961                     'format_note': quality_label,
1962                     'fps': int_or_none(fmt.get('fps')),
1963                     'height': int_or_none(fmt.get('height')),
1964                     # bitrate for itag 43 is always 2147483647
1965                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1966                     'width': int_or_none(fmt.get('width')),
1967                 }
1968
1969             for fmt in streaming_formats:
1970                 if fmt.get('drm_families'):
1971                     continue
1972                 url = url_or_none(fmt.get('url'))
1973
1974                 if not url:
1975                     cipher = fmt.get('cipher')
1976                     if not cipher:
1977                         continue
1978                     url_data = compat_parse_qs(cipher)
1979                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1980                     if not url:
1981                         continue
1982                 else:
1983                     cipher = None
1984                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1985
1986                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1987                 # Unsupported FORMAT_STREAM_TYPE_OTF
1988                 if stream_type == 3:
1989                     continue
1990
1991                 format_id = fmt.get('itag') or url_data['itag'][0]
1992                 if not format_id:
1993                     continue
1994                 format_id = compat_str(format_id)
1995
1996                 if cipher:
1997                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1998                         ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1999                         jsplayer_url_json = self._search_regex(
2000                             ASSETS_RE,
2001                             embed_webpage if age_gate else video_webpage,
2002                             'JS player URL (1)', default=None)
2003                         if not jsplayer_url_json and not age_gate:
2004                             # We need the embed website after all
2005                             if embed_webpage is None:
2006                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2007                                 embed_webpage = self._download_webpage(
2008                                     embed_url, video_id, 'Downloading embed webpage')
2009                             jsplayer_url_json = self._search_regex(
2010                                 ASSETS_RE, embed_webpage, 'JS player URL')
2011
2012                         player_url = json.loads(jsplayer_url_json)
2013                         if player_url is None:
2014                             player_url_json = self._search_regex(
2015                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2016                                 video_webpage, 'age gate player URL')
2017                             player_url = json.loads(player_url_json)
2018
2019                     if 'sig' in url_data:
2020                         url += '&signature=' + url_data['sig'][0]
2021                     elif 's' in url_data:
2022                         encrypted_sig = url_data['s'][0]
2023
2024                         if self._downloader.params.get('verbose'):
2025                             if player_url is None:
2026                                 player_version = 'unknown'
2027                                 player_desc = 'unknown'
2028                             else:
2029                                 if player_url.endswith('swf'):
2030                                     player_version = self._search_regex(
2031                                         r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
2032                                         'flash player', fatal=False)
2033                                     player_desc = 'flash player %s' % player_version
2034                                 else:
2035                                     player_version = self._search_regex(
2036                                         [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
2037                                          r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
2038                                         player_url,
2039                                         'html5 player', fatal=False)
2040                                     player_desc = 'html5 player %s' % player_version
2041
2042                             parts_sizes = self._signature_cache_id(encrypted_sig)
2043                             self.to_screen('{%s} signature length %s, %s' %
2044                                            (format_id, parts_sizes, player_desc))
2045
2046                         signature = self._decrypt_signature(
2047                             encrypted_sig, video_id, player_url, age_gate)
2048                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2049                         url += '&%s=%s' % (sp, signature)
2050                 if 'ratebypass' not in url:
2051                     url += '&ratebypass=yes'
2052
2053                 dct = {
2054                     'format_id': format_id,
2055                     'url': url,
2056                     'player_url': player_url,
2057                 }
2058                 if format_id in self._formats:
2059                     dct.update(self._formats[format_id])
2060                 if format_id in formats_spec:
2061                     dct.update(formats_spec[format_id])
2062
2063                 # Some itags are not included in DASH manifest thus corresponding formats will
2064                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2065                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2066                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2067                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2068
2069                 if width is None:
2070                     width = int_or_none(fmt.get('width'))
2071                 if height is None:
2072                     height = int_or_none(fmt.get('height'))
2073
2074                 filesize = int_or_none(url_data.get(
2075                     'clen', [None])[0]) or _extract_filesize(url)
2076
2077                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2078                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2079
2080                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2081                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2082                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2083
2084                 more_fields = {
2085                     'filesize': filesize,
2086                     'tbr': tbr,
2087                     'width': width,
2088                     'height': height,
2089                     'fps': fps,
2090                     'format_note': quality_label or quality,
2091                 }
2092                 for key, value in more_fields.items():
2093                     if value:
2094                         dct[key] = value
2095                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2096                 if type_:
2097                     type_split = type_.split(';')
2098                     kind_ext = type_split[0].split('/')
2099                     if len(kind_ext) == 2:
2100                         kind, _ = kind_ext
2101                         dct['ext'] = mimetype2ext(type_split[0])
2102                         if kind in ('audio', 'video'):
2103                             codecs = None
2104                             for mobj in re.finditer(
2105                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2106                                 if mobj.group('key') == 'codecs':
2107                                     codecs = mobj.group('val')
2108                                     break
2109                             if codecs:
2110                                 dct.update(parse_codecs(codecs))
2111                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2112                     dct['downloader_options'] = {
2113                         # Youtube throttles chunks >~10M
2114                         'http_chunk_size': 10485760,
2115                     }
2116                 formats.append(dct)
2117         else:
2118             manifest_url = (
2119                 url_or_none(try_get(
2120                     player_response,
2121                     lambda x: x['streamingData']['hlsManifestUrl'],
2122                     compat_str))
2123                 or url_or_none(try_get(
2124                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2125             if manifest_url:
2126                 formats = []
2127                 m3u8_formats = self._extract_m3u8_formats(
2128                     manifest_url, video_id, 'mp4', fatal=False)
2129                 for a_format in m3u8_formats:
2130                     itag = self._search_regex(
2131                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2132                     if itag:
2133                         a_format['format_id'] = itag
2134                         if itag in self._formats:
2135                             dct = self._formats[itag].copy()
2136                             dct.update(a_format)
2137                             a_format = dct
2138                     a_format['player_url'] = player_url
2139                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2140                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2141                     formats.append(a_format)
2142             else:
2143                 error_message = extract_unavailable_message()
2144                 if not error_message:
2145                     error_message = clean_html(try_get(
2146                         player_response, lambda x: x['playabilityStatus']['reason'],
2147                         compat_str))
2148                 if not error_message:
2149                     error_message = clean_html(
2150                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2151                 if error_message:
2152                     raise ExtractorError(error_message, expected=True)
2153                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2154
2155         # uploader
2156         video_uploader = try_get(
2157             video_info, lambda x: x['author'][0],
2158             compat_str) or str_or_none(video_details.get('author'))
2159         if video_uploader:
2160             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2161         else:
2162             self._downloader.report_warning('unable to extract uploader name')
2163
2164         # uploader_id
2165         video_uploader_id = None
2166         video_uploader_url = None
2167         mobj = re.search(
2168             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2169             video_webpage)
2170         if mobj is not None:
2171             video_uploader_id = mobj.group('uploader_id')
2172             video_uploader_url = mobj.group('uploader_url')
2173         else:
2174             self._downloader.report_warning('unable to extract uploader nickname')
2175
2176         channel_id = (
2177             str_or_none(video_details.get('channelId'))
2178             or self._html_search_meta(
2179                 'channelId', video_webpage, 'channel id', default=None)
2180             or self._search_regex(
2181                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2182                 video_webpage, 'channel id', default=None, group='id'))
2183         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2184
2185         # thumbnail image
2186         # We try first to get a high quality image:
2187         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2188                             video_webpage, re.DOTALL)
2189         if m_thumb is not None:
2190             video_thumbnail = m_thumb.group(1)
2191         elif 'thumbnail_url' not in video_info:
2192             self._downloader.report_warning('unable to extract video thumbnail')
2193             video_thumbnail = None
2194         else:   # don't panic if we can't find it
2195             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2196
2197         # upload date
2198         upload_date = self._html_search_meta(
2199             'datePublished', video_webpage, 'upload date', default=None)
2200         if not upload_date:
2201             upload_date = self._search_regex(
2202                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2203                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2204                 video_webpage, 'upload date', default=None)
2205         upload_date = unified_strdate(upload_date)
2206
2207         video_license = self._html_search_regex(
2208             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2209             video_webpage, 'license', default=None)
2210
2211         m_music = re.search(
2212             r'''(?x)
2213                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2214                 <ul[^>]*>\s*
2215                 <li>(?P<title>.+?)
2216                 by (?P<creator>.+?)
2217                 (?:
2218                     \(.+?\)|
2219                     <a[^>]*
2220                         (?:
2221                             \bhref=["\']/red[^>]*>|             # drop possible
2222                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2223                         )
2224                     .*?
2225                 )?</li
2226             ''',
2227             video_webpage)
2228         if m_music:
2229             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2230             video_creator = clean_html(m_music.group('creator'))
2231         else:
2232             video_alt_title = video_creator = None
2233
2234         def extract_meta(field):
2235             return self._html_search_regex(
2236                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2237                 video_webpage, field, default=None)
2238
2239         track = extract_meta('Song')
2240         artist = extract_meta('Artist')
2241         album = extract_meta('Album')
2242
2243         # Youtube Music Auto-generated description
2244         release_date = release_year = None
2245         if video_description:
2246             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2247             if mobj:
2248                 if not track:
2249                     track = mobj.group('track').strip()
2250                 if not artist:
2251                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2252                 if not album:
2253                     album = mobj.group('album'.strip())
2254                 release_year = mobj.group('release_year')
2255                 release_date = mobj.group('release_date')
2256                 if release_date:
2257                     release_date = release_date.replace('-', '')
2258                     if not release_year:
2259                         release_year = int(release_date[:4])
2260                 if release_year:
2261                     release_year = int(release_year)
2262
2263         m_episode = re.search(
2264             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2265             video_webpage)
2266         if m_episode:
2267             series = unescapeHTML(m_episode.group('series'))
2268             season_number = int(m_episode.group('season'))
2269             episode_number = int(m_episode.group('episode'))
2270         else:
2271             series = season_number = episode_number = None
2272
2273         m_cat_container = self._search_regex(
2274             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2275             video_webpage, 'categories', default=None)
2276         if m_cat_container:
2277             category = self._html_search_regex(
2278                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2279                 default=None)
2280             video_categories = None if category is None else [category]
2281         else:
2282             video_categories = None
2283
2284         video_tags = [
2285             unescapeHTML(m.group('content'))
2286             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2287
2288         def _extract_count(count_name):
2289             return str_to_int(self._search_regex(
2290                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2291                 % re.escape(count_name),
2292                 video_webpage, count_name, default=None))
2293
2294         like_count = _extract_count('like')
2295         dislike_count = _extract_count('dislike')
2296
2297         if view_count is None:
2298             view_count = str_to_int(self._search_regex(
2299                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2300                 'view count', default=None))
2301
2302         average_rating = (
2303             float_or_none(video_details.get('averageRating'))
2304             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2305
2306         # subtitles
2307         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2308         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2309
2310         video_duration = try_get(
2311             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2312         if not video_duration:
2313             video_duration = int_or_none(video_details.get('lengthSeconds'))
2314         if not video_duration:
2315             video_duration = parse_duration(self._html_search_meta(
2316                 'duration', video_webpage, 'video duration'))
2317
2318         # annotations
2319         video_annotations = None
2320         if self._downloader.params.get('writeannotations', False):
2321             xsrf_token = self._search_regex(
2322                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2323                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2324             invideo_url = try_get(
2325                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2326             if xsrf_token and invideo_url:
2327                 xsrf_field_name = self._search_regex(
2328                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2329                     video_webpage, 'xsrf field name',
2330                     group='xsrf_field_name', default='session_token')
2331                 video_annotations = self._download_webpage(
2332                     self._proto_relative_url(invideo_url),
2333                     video_id, note='Downloading annotations',
2334                     errnote='Unable to download video annotations', fatal=False,
2335                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2336
2337         chapters = self._extract_chapters(description_original, video_duration)
2338
2339         # Look for the DASH manifest
2340         if self._downloader.params.get('youtube_include_dash_manifest', True):
2341             dash_mpd_fatal = True
2342             for mpd_url in dash_mpds:
2343                 dash_formats = {}
2344                 try:
2345                     def decrypt_sig(mobj):
2346                         s = mobj.group(1)
2347                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2348                         return '/signature/%s' % dec_s
2349
2350                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2351
2352                     for df in self._extract_mpd_formats(
2353                             mpd_url, video_id, fatal=dash_mpd_fatal,
2354                             formats_dict=self._formats):
2355                         if not df.get('filesize'):
2356                             df['filesize'] = _extract_filesize(df['url'])
2357                         # Do not overwrite DASH format found in some previous DASH manifest
2358                         if df['format_id'] not in dash_formats:
2359                             dash_formats[df['format_id']] = df
2360                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2361                         # allow them to fail without bug report message if we already have
2362                         # some DASH manifest succeeded. This is temporary workaround to reduce
2363                         # burst of bug reports until we figure out the reason and whether it
2364                         # can be fixed at all.
2365                         dash_mpd_fatal = False
2366                 except (ExtractorError, KeyError) as e:
2367                     self.report_warning(
2368                         'Skipping DASH manifest: %r' % e, video_id)
2369                 if dash_formats:
2370                     # Remove the formats we found through non-DASH, they
2371                     # contain less info and it can be wrong, because we use
2372                     # fixed values (for example the resolution). See
2373                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2374                     # example.
2375                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2376                     formats.extend(dash_formats.values())
2377
2378         # Check for malformed aspect ratio
2379         stretched_m = re.search(
2380             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2381             video_webpage)
2382         if stretched_m:
2383             w = float(stretched_m.group('w'))
2384             h = float(stretched_m.group('h'))
2385             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2386             # We will only process correct ratios.
2387             if w > 0 and h > 0:
2388                 ratio = w / h
2389                 for f in formats:
2390                     if f.get('vcodec') != 'none':
2391                         f['stretched_ratio'] = ratio
2392
2393         if not formats:
2394             token = extract_token(video_info)
2395             if not token:
2396                 if 'reason' in video_info:
2397                     if 'The uploader has not made this video available in your country.' in video_info['reason']:
2398                         regions_allowed = self._html_search_meta(
2399                             'regionsAllowed', video_webpage, default=None)
2400                         countries = regions_allowed.split(',') if regions_allowed else None
2401                         self.raise_geo_restricted(
2402                             msg=video_info['reason'][0], countries=countries)
2403                     reason = video_info['reason'][0]
2404                     if 'Invalid parameters' in reason:
2405                         unavailable_message = extract_unavailable_message()
2406                         if unavailable_message:
2407                             reason = unavailable_message
2408                     raise ExtractorError(
2409                         'YouTube said: %s' % reason,
2410                         expected=True, video_id=video_id)
2411                 else:
2412                     raise ExtractorError(
2413                         '"token" parameter not in video info for unknown reason',
2414                         video_id=video_id)
2415
2416         if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2417             raise ExtractorError('This video is DRM protected.', expected=True)
2418
2419         self._sort_formats(formats)
2420
2421         self.mark_watched(video_id, video_info, player_response)
2422
2423         return {
2424             'id': video_id,
2425             'uploader': video_uploader,
2426             'uploader_id': video_uploader_id,
2427             'uploader_url': video_uploader_url,
2428             'channel_id': channel_id,
2429             'channel_url': channel_url,
2430             'upload_date': upload_date,
2431             'license': video_license,
2432             'creator': video_creator or artist,
2433             'title': video_title,
2434             'alt_title': video_alt_title or track,
2435             'thumbnail': video_thumbnail,
2436             'description': video_description,
2437             'categories': video_categories,
2438             'tags': video_tags,
2439             'subtitles': video_subtitles,
2440             'automatic_captions': automatic_captions,
2441             'duration': video_duration,
2442             'age_limit': 18 if age_gate else 0,
2443             'annotations': video_annotations,
2444             'chapters': chapters,
2445             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2446             'view_count': view_count,
2447             'like_count': like_count,
2448             'dislike_count': dislike_count,
2449             'average_rating': average_rating,
2450             'formats': formats,
2451             'is_live': is_live,
2452             'start_time': start_time,
2453             'end_time': end_time,
2454             'series': series,
2455             'season_number': season_number,
2456             'episode_number': episode_number,
2457             'track': track,
2458             'artist': artist,
2459             'album': album,
2460             'release_date': release_date,
2461             'release_year': release_year,
2462         }
2463
2464
2465 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2466     IE_DESC = 'YouTube.com playlists'
2467     _VALID_URL = r"""(?x)(?:
2468                         (?:https?://)?
2469                         (?:\w+\.)?
2470                         (?:
2471                             (?:
2472                                 youtube(?:kids)?\.com|
2473                                 invidio\.us
2474                             )
2475                             /
2476                             (?:
2477                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2478                                \? (?:.*?[&;])*? (?:p|a|list)=
2479                             |  p/
2480                             )|
2481                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2482                         )
2483                         (
2484                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2485                             # Top tracks, they can also include dots
2486                             |(?:MC)[\w\.]*
2487                         )
2488                         .*
2489                      |
2490                         (%(playlist_id)s)
2491                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2492     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2493     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2494     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2495     IE_NAME = 'youtube:playlist'
2496     _TESTS = [{
2497         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2498         'info_dict': {
2499             'title': 'ytdl test PL',
2500             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2501         },
2502         'playlist_count': 3,
2503     }, {
2504         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2505         'info_dict': {
2506             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2507             'title': 'YDL_Empty_List',
2508         },
2509         'playlist_count': 0,
2510         'skip': 'This playlist is private',
2511     }, {
2512         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2513         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2514         'info_dict': {
2515             'title': '29C3: Not my department',
2516             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2517             'uploader': 'Christiaan008',
2518             'uploader_id': 'ChRiStIaAn008',
2519         },
2520         'playlist_count': 95,
2521     }, {
2522         'note': 'issue #673',
2523         'url': 'PLBB231211A4F62143',
2524         'info_dict': {
2525             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2526             'id': 'PLBB231211A4F62143',
2527             'uploader': 'Wickydoo',
2528             'uploader_id': 'Wickydoo',
2529         },
2530         'playlist_mincount': 26,
2531     }, {
2532         'note': 'Large playlist',
2533         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2534         'info_dict': {
2535             'title': 'Uploads from Cauchemar',
2536             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2537             'uploader': 'Cauchemar',
2538             'uploader_id': 'Cauchemar89',
2539         },
2540         'playlist_mincount': 799,
2541     }, {
2542         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2543         'info_dict': {
2544             'title': 'YDL_safe_search',
2545             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2546         },
2547         'playlist_count': 2,
2548         'skip': 'This playlist is private',
2549     }, {
2550         'note': 'embedded',
2551         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2552         'playlist_count': 4,
2553         'info_dict': {
2554             'title': 'JODA15',
2555             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2556             'uploader': 'milan',
2557             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2558         }
2559     }, {
2560         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2561         'playlist_mincount': 485,
2562         'info_dict': {
2563             'title': '2018 Chinese New Singles (11/6 updated)',
2564             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2565             'uploader': 'LBK',
2566             'uploader_id': 'sdragonfang',
2567         }
2568     }, {
2569         'note': 'Embedded SWF player',
2570         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2571         'playlist_count': 4,
2572         'info_dict': {
2573             'title': 'JODA7',
2574             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2575         },
2576         'skip': 'This playlist does not exist',
2577     }, {
2578         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2579         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2580         'info_dict': {
2581             'title': 'Uploads from Interstellar Movie',
2582             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2583             'uploader': 'Interstellar Movie',
2584             'uploader_id': 'InterstellarMovie1',
2585         },
2586         'playlist_mincount': 21,
2587     }, {
2588         # Playlist URL that does not actually serve a playlist
2589         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2590         'info_dict': {
2591             'id': 'FqZTN594JQw',
2592             'ext': 'webm',
2593             'title': "Smiley's People 01 detective, Adventure Series, Action",
2594             'uploader': 'STREEM',
2595             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2596             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2597             'upload_date': '20150526',
2598             'license': 'Standard YouTube License',
2599             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2600             'categories': ['People & Blogs'],
2601             'tags': list,
2602             'view_count': int,
2603             'like_count': int,
2604             'dislike_count': int,
2605         },
2606         'params': {
2607             'skip_download': True,
2608         },
2609         'skip': 'This video is not available.',
2610         'add_ie': [YoutubeIE.ie_key()],
2611     }, {
2612         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2613         'info_dict': {
2614             'id': 'yeWKywCrFtk',
2615             'ext': 'mp4',
2616             'title': 'Small Scale Baler and Braiding Rugs',
2617             'uploader': 'Backus-Page House Museum',
2618             'uploader_id': 'backuspagemuseum',
2619             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2620             'upload_date': '20161008',
2621             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2622             'categories': ['Nonprofits & Activism'],
2623             'tags': list,
2624             'like_count': int,
2625             'dislike_count': int,
2626         },
2627         'params': {
2628             'noplaylist': True,
2629             'skip_download': True,
2630         },
2631     }, {
2632         # https://github.com/ytdl-org/youtube-dl/issues/21844
2633         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2634         'info_dict': {
2635             'title': 'Data Analysis with Dr Mike Pound',
2636             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2637             'uploader_id': 'Computerphile',
2638             'uploader': 'Computerphile',
2639         },
2640         'playlist_mincount': 11,
2641     }, {
2642         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2643         'only_matching': True,
2644     }, {
2645         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2646         'only_matching': True,
2647     }, {
2648         # music album playlist
2649         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2650         'only_matching': True,
2651     }, {
2652         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2653         'only_matching': True,
2654     }, {
2655         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2656         'only_matching': True,
2657     }]
2658
2659     def _real_initialize(self):
2660         self._login()
2661
2662     def extract_videos_from_page(self, page):
2663         ids_in_page = []
2664         titles_in_page = []
2665
2666         for item in re.findall(
2667                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2668             attrs = extract_attributes(item)
2669             video_id = attrs['data-video-id']
2670             video_title = unescapeHTML(attrs.get('data-title'))
2671             if video_title:
2672                 video_title = video_title.strip()
2673             ids_in_page.append(video_id)
2674             titles_in_page.append(video_title)
2675
2676         # Fallback with old _VIDEO_RE
2677         self.extract_videos_from_page_impl(
2678             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2679
2680         # Relaxed fallbacks
2681         self.extract_videos_from_page_impl(
2682             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2683             ids_in_page, titles_in_page)
2684         self.extract_videos_from_page_impl(
2685             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2686             ids_in_page, titles_in_page)
2687
2688         return zip(ids_in_page, titles_in_page)
2689
2690     def _extract_mix(self, playlist_id):
2691         # The mixes are generated from a single video
2692         # the id of the playlist is just 'RD' + video_id
2693         ids = []
2694         last_id = playlist_id[-11:]
2695         for n in itertools.count(1):
2696             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2697             webpage = self._download_webpage(
2698                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2699             new_ids = orderedSet(re.findall(
2700                 r'''(?xs)data-video-username=".*?".*?
2701                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2702                 webpage))
2703             # Fetch new pages until all the videos are repeated, it seems that
2704             # there are always 51 unique videos.
2705             new_ids = [_id for _id in new_ids if _id not in ids]
2706             if not new_ids:
2707                 break
2708             ids.extend(new_ids)
2709             last_id = ids[-1]
2710
2711         url_results = self._ids_to_results(ids)
2712
2713         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2714         title_span = (
2715             search_title('playlist-title')
2716             or search_title('title long-title')
2717             or search_title('title'))
2718         title = clean_html(title_span)
2719
2720         return self.playlist_result(url_results, playlist_id, title)
2721
2722     def _extract_playlist(self, playlist_id):
2723         url = self._TEMPLATE_URL % playlist_id
2724         page = self._download_webpage(url, playlist_id)
2725
2726         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2727         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2728             match = match.strip()
2729             # Check if the playlist exists or is private
2730             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2731             if mobj:
2732                 reason = mobj.group('reason')
2733                 message = 'This playlist %s' % reason
2734                 if 'private' in reason:
2735                     message += ', use --username or --netrc to access it'
2736                 message += '.'
2737                 raise ExtractorError(message, expected=True)
2738             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2739                 raise ExtractorError(
2740                     'Invalid parameters. Maybe URL is incorrect.',
2741                     expected=True)
2742             elif re.match(r'[^<]*Choose your language[^<]*', match):
2743                 continue
2744             else:
2745                 self.report_warning('Youtube gives an alert message: ' + match)
2746
2747         playlist_title = self._html_search_regex(
2748             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2749             page, 'title', default=None)
2750
2751         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2752         uploader = self._html_search_regex(
2753             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2754             page, 'uploader', default=None)
2755         mobj = re.search(
2756             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2757             page)
2758         if mobj:
2759             uploader_id = mobj.group('uploader_id')
2760             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2761         else:
2762             uploader_id = uploader_url = None
2763
2764         has_videos = True
2765
2766         if not playlist_title:
2767             try:
2768                 # Some playlist URLs don't actually serve a playlist (e.g.
2769                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2770                 next(self._entries(page, playlist_id))
2771             except StopIteration:
2772                 has_videos = False
2773
2774         playlist = self.playlist_result(
2775             self._entries(page, playlist_id), playlist_id, playlist_title)
2776         playlist.update({
2777             'uploader': uploader,
2778             'uploader_id': uploader_id,
2779             'uploader_url': uploader_url,
2780         })
2781
2782         return has_videos, playlist
2783
2784     def _check_download_just_video(self, url, playlist_id):
2785         # Check if it's a video-specific URL
2786         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2787         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2788             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2789             'video id', default=None)
2790         if video_id:
2791             if self._downloader.params.get('noplaylist'):
2792                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2793                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2794             else:
2795                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2796                 return video_id, None
2797         return None, None
2798
2799     def _real_extract(self, url):
2800         # Extract playlist id
2801         mobj = re.match(self._VALID_URL, url)
2802         if mobj is None:
2803             raise ExtractorError('Invalid URL: %s' % url)
2804         playlist_id = mobj.group(1) or mobj.group(2)
2805
2806         video_id, video = self._check_download_just_video(url, playlist_id)
2807         if video:
2808             return video
2809
2810         if playlist_id.startswith(('RD', 'UL', 'PU')):
2811             # Mixes require a custom extraction process
2812             return self._extract_mix(playlist_id)
2813
2814         has_videos, playlist = self._extract_playlist(playlist_id)
2815         if has_videos or not video_id:
2816             return playlist
2817
2818         # Some playlist URLs don't actually serve a playlist (see
2819         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2820         # Fallback to plain video extraction if there is a video id
2821         # along with playlist id.
2822         return self.url_result(video_id, 'Youtube', video_id=video_id)
2823
2824
2825 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2826     IE_DESC = 'YouTube.com channels'
2827     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2828     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2829     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2830     IE_NAME = 'youtube:channel'
2831     _TESTS = [{
2832         'note': 'paginated channel',
2833         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2834         'playlist_mincount': 91,
2835         'info_dict': {
2836             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2837             'title': 'Uploads from lex will',
2838             'uploader': 'lex will',
2839             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2840         }
2841     }, {
2842         'note': 'Age restricted channel',
2843         # from https://www.youtube.com/user/DeusExOfficial
2844         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2845         'playlist_mincount': 64,
2846         'info_dict': {
2847             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2848             'title': 'Uploads from Deus Ex',
2849             'uploader': 'Deus Ex',
2850             'uploader_id': 'DeusExOfficial',
2851         },
2852     }, {
2853         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2854         'only_matching': True,
2855     }, {
2856         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2857         'only_matching': True,
2858     }]
2859
2860     @classmethod
2861     def suitable(cls, url):
2862         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2863                 else super(YoutubeChannelIE, cls).suitable(url))
2864
2865     def _build_template_url(self, url, channel_id):
2866         return self._TEMPLATE_URL % channel_id
2867
2868     def _real_extract(self, url):
2869         channel_id = self._match_id(url)
2870
2871         url = self._build_template_url(url, channel_id)
2872
2873         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2874         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2875         # otherwise fallback on channel by page extraction
2876         channel_page = self._download_webpage(
2877             url + '?view=57', channel_id,
2878             'Downloading channel page', fatal=False)
2879         if channel_page is False:
2880             channel_playlist_id = False
2881         else:
2882             channel_playlist_id = self._html_search_meta(
2883                 'channelId', channel_page, 'channel id', default=None)
2884             if not channel_playlist_id:
2885                 channel_url = self._html_search_meta(
2886                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2887                     channel_page, 'channel url', default=None)
2888                 if channel_url:
2889                     channel_playlist_id = self._search_regex(
2890                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2891                         channel_url, 'channel id', default=None)
2892         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2893             playlist_id = 'UU' + channel_playlist_id[2:]
2894             return self.url_result(
2895                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2896
2897         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2898         autogenerated = re.search(r'''(?x)
2899                 class="[^"]*?(?:
2900                     channel-header-autogenerated-label|
2901                     yt-channel-title-autogenerated
2902                 )[^"]*"''', channel_page) is not None
2903
2904         if autogenerated:
2905             # The videos are contained in a single page
2906             # the ajax pages can't be used, they are empty
2907             entries = [
2908                 self.url_result(
2909                     video_id, 'Youtube', video_id=video_id,
2910                     video_title=video_title)
2911                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2912             return self.playlist_result(entries, channel_id)
2913
2914         try:
2915             next(self._entries(channel_page, channel_id))
2916         except StopIteration:
2917             alert_message = self._html_search_regex(
2918                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2919                 channel_page, 'alert', default=None, group='alert')
2920             if alert_message:
2921                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2922
2923         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2924
2925
2926 class YoutubeUserIE(YoutubeChannelIE):
2927     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2928     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2929     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2930     IE_NAME = 'youtube:user'
2931
2932     _TESTS = [{
2933         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2934         'playlist_mincount': 320,
2935         'info_dict': {
2936             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2937             'title': 'Uploads from The Linux Foundation',
2938             'uploader': 'The Linux Foundation',
2939             'uploader_id': 'TheLinuxFoundation',
2940         }
2941     }, {
2942         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2943         # but not https://www.youtube.com/user/12minuteathlete/videos
2944         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2945         'playlist_mincount': 249,
2946         'info_dict': {
2947             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2948             'title': 'Uploads from 12 Minute Athlete',
2949             'uploader': '12 Minute Athlete',
2950             'uploader_id': 'the12minuteathlete',
2951         }
2952     }, {
2953         'url': 'ytuser:phihag',
2954         'only_matching': True,
2955     }, {
2956         'url': 'https://www.youtube.com/c/gametrailers',
2957         'only_matching': True,
2958     }, {
2959         'url': 'https://www.youtube.com/gametrailers',
2960         'only_matching': True,
2961     }, {
2962         # This channel is not available, geo restricted to JP
2963         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2964         'only_matching': True,
2965     }]
2966
2967     @classmethod
2968     def suitable(cls, url):
2969         # Don't return True if the url can be extracted with other youtube
2970         # extractor, the regex would is too permissive and it would match.
2971         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2972         if any(ie.suitable(url) for ie in other_yt_ies):
2973             return False
2974         else:
2975             return super(YoutubeUserIE, cls).suitable(url)
2976
2977     def _build_template_url(self, url, channel_id):
2978         mobj = re.match(self._VALID_URL, url)
2979         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2980
2981
2982 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2983     IE_DESC = 'YouTube.com live streams'
2984     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2985     IE_NAME = 'youtube:live'
2986
2987     _TESTS = [{
2988         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2989         'info_dict': {
2990             'id': 'a48o2S1cPoo',
2991             'ext': 'mp4',
2992             'title': 'The Young Turks - Live Main Show',
2993             'uploader': 'The Young Turks',
2994             'uploader_id': 'TheYoungTurks',
2995             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2996             'upload_date': '20150715',
2997             'license': 'Standard YouTube License',
2998             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2999             'categories': ['News & Politics'],
3000             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3001             'like_count': int,
3002             'dislike_count': int,
3003         },
3004         'params': {
3005             'skip_download': True,
3006         },
3007     }, {
3008         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3009         'only_matching': True,
3010     }, {
3011         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3012         'only_matching': True,
3013     }, {
3014         'url': 'https://www.youtube.com/TheYoungTurks/live',
3015         'only_matching': True,
3016     }]
3017
3018     def _real_extract(self, url):
3019         mobj = re.match(self._VALID_URL, url)
3020         channel_id = mobj.group('id')
3021         base_url = mobj.group('base_url')
3022         webpage = self._download_webpage(url, channel_id, fatal=False)
3023         if webpage:
3024             page_type = self._og_search_property(
3025                 'type', webpage, 'page type', default='')
3026             video_id = self._html_search_meta(
3027                 'videoId', webpage, 'video id', default=None)
3028             if page_type.startswith('video') and video_id and re.match(
3029                     r'^[0-9A-Za-z_-]{11}$', video_id):
3030                 return self.url_result(video_id, YoutubeIE.ie_key())
3031         return self.url_result(base_url)
3032
3033
3034 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3035     IE_DESC = 'YouTube.com user/channel playlists'
3036     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3037     IE_NAME = 'youtube:playlists'
3038
3039     _TESTS = [{
3040         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3041         'playlist_mincount': 4,
3042         'info_dict': {
3043             'id': 'ThirstForScience',
3044             'title': 'ThirstForScience',
3045         },
3046     }, {
3047         # with "Load more" button
3048         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3049         'playlist_mincount': 70,
3050         'info_dict': {
3051             'id': 'igorkle1',
3052             'title': 'Игорь Клейнер',
3053         },
3054     }, {
3055         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3056         'playlist_mincount': 17,
3057         'info_dict': {
3058             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3059             'title': 'Chem Player',
3060         },
3061         'skip': 'Blocked',
3062     }]
3063
3064
3065 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3066     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3067
3068
3069 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3070     IE_DESC = 'YouTube.com searches'
3071     # there doesn't appear to be a real limit, for example if you search for
3072     # 'python' you get more than 8.000.000 results
3073     _MAX_RESULTS = float('inf')
3074     IE_NAME = 'youtube:search'
3075     _SEARCH_KEY = 'ytsearch'
3076     _EXTRA_QUERY_ARGS = {}
3077     _TESTS = []
3078
3079     def _get_n_results(self, query, n):
3080         """Get a specified number of results for a query"""
3081
3082         videos = []
3083         limit = n
3084
3085         url_query = {
3086             'search_query': query.encode('utf-8'),
3087         }
3088         url_query.update(self._EXTRA_QUERY_ARGS)
3089         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3090
3091         for pagenum in itertools.count(1):
3092             data = self._download_json(
3093                 result_url, video_id='query "%s"' % query,
3094                 note='Downloading page %s' % pagenum,
3095                 errnote='Unable to download API page',
3096                 query={'spf': 'navigate'})
3097             html_content = data[1]['body']['content']
3098
3099             if 'class="search-message' in html_content:
3100                 raise ExtractorError(
3101                     '[youtube] No video results', expected=True)
3102
3103             new_videos = list(self._process_page(html_content))
3104             videos += new_videos
3105             if not new_videos or len(videos) > limit:
3106                 break
3107             next_link = self._html_search_regex(
3108                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3109                 html_content, 'next link', default=None)
3110             if next_link is None:
3111                 break
3112             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3113
3114         if len(videos) > n:
3115             videos = videos[:n]
3116         return self.playlist_result(videos, query)
3117
3118
3119 class YoutubeSearchDateIE(YoutubeSearchIE):
3120     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3121     _SEARCH_KEY = 'ytsearchdate'
3122     IE_DESC = 'YouTube.com searches, newest videos first'
3123     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3124
3125
3126 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3127     IE_DESC = 'YouTube.com search URLs'
3128     IE_NAME = 'youtube:search_url'
3129     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3130     _TESTS = [{
3131         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3132         'playlist_mincount': 5,
3133         'info_dict': {
3134             'title': 'youtube-dl test video',
3135         }
3136     }, {
3137         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3138         'only_matching': True,
3139     }]
3140
3141     def _real_extract(self, url):
3142         mobj = re.match(self._VALID_URL, url)
3143         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3144         webpage = self._download_webpage(url, query)
3145         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3146
3147
3148 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3149     IE_DESC = 'YouTube.com (multi-season) shows'
3150     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3151     IE_NAME = 'youtube:show'
3152     _TESTS = [{
3153         'url': 'https://www.youtube.com/show/airdisasters',
3154         'playlist_mincount': 5,
3155         'info_dict': {
3156             'id': 'airdisasters',
3157             'title': 'Air Disasters',
3158         }
3159     }]
3160
3161     def _real_extract(self, url):
3162         playlist_id = self._match_id(url)
3163         return super(YoutubeShowIE, self)._real_extract(
3164             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3165
3166
3167 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3168     """
3169     Base class for feed extractors
3170     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3171     """
3172     _LOGIN_REQUIRED = True
3173
3174     @property
3175     def IE_NAME(self):
3176         return 'youtube:%s' % self._FEED_NAME
3177
3178     def _real_initialize(self):
3179         self._login()
3180
3181     def _entries(self, page):
3182         # The extraction process is the same as for playlists, but the regex
3183         # for the video ids doesn't contain an index
3184         ids = []
3185         more_widget_html = content_html = page
3186         for page_num in itertools.count(1):
3187             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3188
3189             # 'recommended' feed has infinite 'load more' and each new portion spins
3190             # the same videos in (sometimes) slightly different order, so we'll check
3191             # for unicity and break when portion has no new videos
3192             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3193             if not new_ids:
3194                 break
3195
3196             ids.extend(new_ids)
3197
3198             for entry in self._ids_to_results(new_ids):
3199                 yield entry
3200
3201             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3202             if not mobj:
3203                 break
3204
3205             more = self._download_json(
3206                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3207                 'Downloading page #%s' % page_num,
3208                 transform_source=uppercase_escape)
3209             content_html = more['content_html']
3210             more_widget_html = more['load_more_widget_html']
3211
3212     def _real_extract(self, url):
3213         page = self._download_webpage(
3214             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3215             self._PLAYLIST_TITLE)
3216         return self.playlist_result(
3217             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3218
3219
3220 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3221     IE_NAME = 'youtube:watchlater'
3222     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3223     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3224
3225     _TESTS = [{
3226         'url': 'https://www.youtube.com/playlist?list=WL',
3227         'only_matching': True,
3228     }, {
3229         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3230         'only_matching': True,
3231     }]
3232
3233     def _real_extract(self, url):
3234         _, video = self._check_download_just_video(url, 'WL')
3235         if video:
3236             return video
3237         _, playlist = self._extract_playlist('WL')
3238         return playlist
3239
3240
3241 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3242     IE_NAME = 'youtube:favorites'
3243     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3244     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3245     _LOGIN_REQUIRED = True
3246
3247     def _real_extract(self, url):
3248         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3249         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3250         return self.url_result(playlist_id, 'YoutubePlaylist')
3251
3252
3253 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3254     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3255     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3256     _FEED_NAME = 'recommended'
3257     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3258
3259
3260 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3261     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3262     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3263     _FEED_NAME = 'subscriptions'
3264     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3265
3266
3267 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3268     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3269     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3270     _FEED_NAME = 'history'
3271     _PLAYLIST_TITLE = 'Youtube History'
3272
3273
3274 class YoutubeTruncatedURLIE(InfoExtractor):
3275     IE_NAME = 'youtube:truncated_url'
3276     IE_DESC = False  # Do not list
3277     _VALID_URL = r'''(?x)
3278         (?:https?://)?
3279         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3280         (?:watch\?(?:
3281             feature=[a-z_]+|
3282             annotation_id=annotation_[^&]+|
3283             x-yt-cl=[0-9]+|
3284             hl=[^&]*|
3285             t=[0-9]+
3286         )?
3287         |
3288             attribution_link\?a=[^&]+
3289         )
3290         $
3291     '''
3292
3293     _TESTS = [{
3294         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3295         'only_matching': True,
3296     }, {
3297         'url': 'https://www.youtube.com/watch?',
3298         'only_matching': True,
3299     }, {
3300         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3301         'only_matching': True,
3302     }, {
3303         'url': 'https://www.youtube.com/watch?feature=foo',
3304         'only_matching': True,
3305     }, {
3306         'url': 'https://www.youtube.com/watch?hl=en-GB',
3307         'only_matching': True,
3308     }, {
3309         'url': 'https://www.youtube.com/watch?t=2372',
3310         'only_matching': True,
3311     }]
3312
3313     def _real_extract(self, url):
3314         raise ExtractorError(
3315             'Did you forget to quote the URL? Remember that & is a meta '
3316             'character in most shells, so you want to put the URL in quotes, '
3317             'like  youtube-dl '
3318             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3319             ' or simply  youtube-dl BaW_jenozKc  .',
3320             expected=True)
3321
3322
3323 class YoutubeTruncatedIDIE(InfoExtractor):
3324     IE_NAME = 'youtube:truncated_id'
3325     IE_DESC = False  # Do not list
3326     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3327
3328     _TESTS = [{
3329         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3330         'only_matching': True,
3331     }]
3332
3333     def _real_extract(self, url):
3334         video_id = self._match_id(url)
3335         raise ExtractorError(
3336             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3337             expected=True)