d9b13adc211d3623d3594e16ccdb0f4454fe4d4c
[ytdl] / youtube_dl / extractor / peertube.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     int_or_none,
10     parse_resolution,
11     str_or_none,
12     try_get,
13     unified_timestamp,
14     url_or_none,
15     urljoin,
16 )
17
18
19 class PeerTubeIE(InfoExtractor):
20     _INSTANCES_RE = r'''(?:
21                             # Taken from https://instances.joinpeertube.org/instances
22                             peertube\.rainbowswingers\.net|
23                             tube\.stanisic\.nl|
24                             peer\.suiri\.us|
25                             medias\.libox\.fr|
26                             videomensoif\.ynh\.fr|
27                             peertube\.travelpandas\.eu|
28                             peertube\.rachetjay\.fr|
29                             peertube\.montecsys\.fr|
30                             tube\.eskuero\.me|
31                             peer\.tube|
32                             peertube\.umeahackerspace\.se|
33                             tube\.nx-pod\.de|
34                             video\.monsieurbidouille\.fr|
35                             tube\.openalgeria\.org|
36                             vid\.lelux\.fi|
37                             video\.anormallostpod\.ovh|
38                             tube\.crapaud-fou\.org|
39                             peertube\.stemy\.me|
40                             lostpod\.space|
41                             exode\.me|
42                             peertube\.snargol\.com|
43                             vis\.ion\.ovh|
44                             videosdulib\.re|
45                             v\.mbius\.io|
46                             videos\.judrey\.eu|
47                             peertube\.osureplayviewer\.xyz|
48                             peertube\.mathieufamily\.ovh|
49                             www\.videos-libr\.es|
50                             fightforinfo\.com|
51                             peertube\.fediverse\.ru|
52                             peertube\.oiseauroch\.fr|
53                             video\.nesven\.eu|
54                             v\.bearvideo\.win|
55                             video\.qoto\.org|
56                             justporn\.cc|
57                             video\.vny\.fr|
58                             peervideo\.club|
59                             tube\.taker\.fr|
60                             peertube\.chantierlibre\.org|
61                             tube\.ipfixe\.info|
62                             tube\.kicou\.info|
63                             tube\.dodsorf\.as|
64                             videobit\.cc|
65                             video\.yukari\.moe|
66                             videos\.elbinario\.net|
67                             hkvideo\.live|
68                             pt\.tux\.tf|
69                             www\.hkvideo\.live|
70                             FIGHTFORINFO\.com|
71                             pt\.765racing\.com|
72                             peertube\.gnumeria\.eu\.org|
73                             nordenmedia\.com|
74                             peertube\.co\.uk|
75                             tube\.darfweb\.eu|
76                             tube\.kalah-france\.org|
77                             0ch\.in|
78                             vod\.mochi\.academy|
79                             film\.node9\.org|
80                             peertube\.hatthieves\.es|
81                             video\.fitchfamily\.org|
82                             peertube\.ddns\.net|
83                             video\.ifuncle\.kr|
84                             video\.fdlibre\.eu|
85                             tube\.22decembre\.eu|
86                             peertube\.harmoniescreatives\.com|
87                             tube\.fabrigli\.fr|
88                             video\.thedwyers\.co|
89                             video\.bruitbruit\.com|
90                             peertube\.foxfam\.club|
91                             peer\.philoxweb\.be|
92                             videos\.bugs\.social|
93                             peertube\.malbert\.xyz|
94                             peertube\.bilange\.ca|
95                             libretube\.net|
96                             diytelevision\.com|
97                             peertube\.fedilab\.app|
98                             libre\.video|
99                             video\.mstddntfdn\.online|
100                             us\.tv|
101                             peertube\.sl-network\.fr|
102                             peertube\.dynlinux\.io|
103                             peertube\.david\.durieux\.family|
104                             peertube\.linuxrocks\.online|
105                             peerwatch\.xyz|
106                             v\.kretschmann\.social|
107                             tube\.otter\.sh|
108                             yt\.is\.nota\.live|
109                             tube\.dragonpsi\.xyz|
110                             peertube\.boneheadmedia\.com|
111                             videos\.funkwhale\.audio|
112                             watch\.44con\.com|
113                             peertube\.gcaillaut\.fr|
114                             peertube\.icu|
115                             pony\.tube|
116                             spacepub\.space|
117                             tube\.stbr\.io|
118                             v\.mom-gay\.faith|
119                             tube\.port0\.xyz|
120                             peertube\.simounet\.net|
121                             play\.jergefelt\.se|
122                             peertube\.zeteo\.me|
123                             tube\.danq\.me|
124                             peertube\.kerenon\.com|
125                             tube\.fab-l3\.org|
126                             tube\.calculate\.social|
127                             peertube\.mckillop\.org|
128                             tube\.netzspielplatz\.de|
129                             vod\.ksite\.de|
130                             peertube\.laas\.fr|
131                             tube\.govital\.net|
132                             peertube\.stephenson\.cc|
133                             bistule\.nohost\.me|
134                             peertube\.kajalinifi\.de|
135                             video\.ploud\.jp|
136                             video\.omniatv\.com|
137                             peertube\.ffs2play\.fr|
138                             peertube\.leboulaire\.ovh|
139                             peertube\.tronic-studio\.com|
140                             peertube\.public\.cat|
141                             peertube\.metalbanana\.net|
142                             video\.1000i100\.fr|
143                             peertube\.alter-nativ-voll\.de|
144                             tube\.pasa\.tf|
145                             tube\.worldofhauru\.xyz|
146                             pt\.kamp\.site|
147                             peertube\.teleassist\.fr|
148                             videos\.mleduc\.xyz|
149                             conf\.tube|
150                             media\.privacyinternational\.org|
151                             pt\.forty-two\.nl|
152                             video\.halle-leaks\.de|
153                             video\.grosskopfgames\.de|
154                             peertube\.schaeferit\.de|
155                             peertube\.jackbot\.fr|
156                             tube\.extinctionrebellion\.fr|
157                             peertube\.f-si\.org|
158                             video\.subak\.ovh|
159                             videos\.koweb\.fr|
160                             peertube\.zergy\.net|
161                             peertube\.roflcopter\.fr|
162                             peertube\.floss-marketing-school\.com|
163                             vloggers\.social|
164                             peertube\.iriseden\.eu|
165                             videos\.ubuntu-paris\.org|
166                             peertube\.mastodon\.host|
167                             armstube\.com|
168                             peertube\.s2s\.video|
169                             peertube\.lol|
170                             tube\.open-plug\.eu|
171                             open\.tube|
172                             peertube\.ch|
173                             peertube\.normandie-libre\.fr|
174                             peertube\.slat\.org|
175                             video\.lacaveatonton\.ovh|
176                             peertube\.uno|
177                             peertube\.servebeer\.com|
178                             peertube\.fedi\.quebec|
179                             tube\.h3z\.jp|
180                             tube\.plus200\.com|
181                             peertube\.eric\.ovh|
182                             tube\.metadocs\.cc|
183                             tube\.unmondemeilleur\.eu|
184                             gouttedeau\.space|
185                             video\.antirep\.net|
186                             nrop\.cant\.at|
187                             tube\.ksl-bmx\.de|
188                             tube\.plaf\.fr|
189                             tube\.tchncs\.de|
190                             video\.devinberg\.com|
191                             hitchtube\.fr|
192                             peertube\.kosebamse\.com|
193                             yunopeertube\.myddns\.me|
194                             peertube\.varney\.fr|
195                             peertube\.anon-kenkai\.com|
196                             tube\.maiti\.info|
197                             tubee\.fr|
198                             videos\.dinofly\.com|
199                             toobnix\.org|
200                             videotape\.me|
201                             voca\.tube|
202                             video\.heromuster\.com|
203                             video\.lemediatv\.fr|
204                             video\.up\.edu\.ph|
205                             balafon\.video|
206                             video\.ivel\.fr|
207                             thickrips\.cloud|
208                             pt\.laurentkruger\.fr|
209                             video\.monarch-pass\.net|
210                             peertube\.artica\.center|
211                             video\.alternanet\.fr|
212                             indymotion\.fr|
213                             fanvid\.stopthatimp\.net|
214                             video\.farci\.org|
215                             v\.lesterpig\.com|
216                             video\.okaris\.de|
217                             tube\.pawelko\.net|
218                             peertube\.mablr\.org|
219                             tube\.fede\.re|
220                             pytu\.be|
221                             evertron\.tv|
222                             devtube\.dev-wiki\.de|
223                             raptube\.antipub\.org|
224                             video\.selea\.se|
225                             peertube\.mygaia\.org|
226                             video\.oh14\.de|
227                             peertube\.livingutopia\.org|
228                             peertube\.the-penguin\.de|
229                             tube\.thechangebook\.org|
230                             tube\.anjara\.eu|
231                             pt\.pube\.tk|
232                             video\.samedi\.pm|
233                             mplayer\.demouliere\.eu|
234                             widemus\.de|
235                             peertube\.me|
236                             peertube\.zapashcanon\.fr|
237                             video\.latavernedejohnjohn\.fr|
238                             peertube\.pcservice46\.fr|
239                             peertube\.mazzonetto\.eu|
240                             video\.irem\.univ-paris-diderot\.fr|
241                             video\.livecchi\.cloud|
242                             alttube\.fr|
243                             video\.coop\.tools|
244                             video\.cabane-libre\.org|
245                             peertube\.openstreetmap\.fr|
246                             videos\.alolise\.org|
247                             irrsinn\.video|
248                             video\.antopie\.org|
249                             scitech\.video|
250                             tube2\.nemsia\.org|
251                             video\.amic37\.fr|
252                             peertube\.freeforge\.eu|
253                             video\.arbitrarion\.com|
254                             video\.datsemultimedia\.com|
255                             stoptrackingus\.tv|
256                             peertube\.ricostrongxxx\.com|
257                             docker\.videos\.lecygnenoir\.info|
258                             peertube\.togart\.de|
259                             tube\.postblue\.info|
260                             videos\.domainepublic\.net|
261                             peertube\.cyber-tribal\.com|
262                             video\.gresille\.org|
263                             peertube\.dsmouse\.net|
264                             cinema\.yunohost\.support|
265                             tube\.theocevaer\.fr|
266                             repro\.video|
267                             tube\.4aem\.com|
268                             quaziinc\.com|
269                             peertube\.metawurst\.space|
270                             videos\.wakapo\.com|
271                             video\.ploud\.fr|
272                             video\.freeradical\.zone|
273                             tube\.valinor\.fr|
274                             refuznik\.video|
275                             pt\.kircheneuenburg\.de|
276                             peertube\.asrun\.eu|
277                             peertube\.lagob\.fr|
278                             videos\.side-ways\.net|
279                             91video\.online|
280                             video\.valme\.io|
281                             video\.taboulisme\.com|
282                             videos-libr\.es|
283                             tv\.mooh\.fr|
284                             nuage\.acostey\.fr|
285                             video\.monsieur-a\.fr|
286                             peertube\.librelois\.fr|
287                             videos\.pair2jeux\.tube|
288                             videos\.pueseso\.club|
289                             peer\.mathdacloud\.ovh|
290                             media\.assassinate-you\.net|
291                             vidcommons\.org|
292                             ptube\.rousset\.nom\.fr|
293                             tube\.cyano\.at|
294                             videos\.squat\.net|
295                             video\.iphodase\.fr|
296                             peertube\.makotoworkshop\.org|
297                             peertube\.serveur\.slv-valbonne\.fr|
298                             vault\.mle\.party|
299                             hostyour\.tv|
300                             videos\.hack2g2\.fr|
301                             libre\.tube|
302                             pire\.artisanlogiciel\.net|
303                             videos\.numerique-en-commun\.fr|
304                             video\.netsyms\.com|
305                             video\.die-partei\.social|
306                             video\.writeas\.org|
307                             peertube\.swarm\.solvingmaz\.es|
308                             tube\.pericoloso\.ovh|
309                             watching\.cypherpunk\.observer|
310                             videos\.adhocmusic\.com|
311                             tube\.rfc1149\.net|
312                             peertube\.librelabucm\.org|
313                             videos\.numericoop\.fr|
314                             peertube\.koehn\.com|
315                             peertube\.anarchmusicall\.net|
316                             tube\.kampftoast\.de|
317                             vid\.y-y\.li|
318                             peertube\.xtenz\.xyz|
319                             diode\.zone|
320                             tube\.egf\.mn|
321                             peertube\.nomagic\.uk|
322                             visionon\.tv|
323                             videos\.koumoul\.com|
324                             video\.rastapuls\.com|
325                             video\.mantlepro\.com|
326                             video\.deadsuperhero\.com|
327                             peertube\.musicstudio\.pro|
328                             peertube\.we-keys\.fr|
329                             artitube\.artifaille\.fr|
330                             peertube\.ethernia\.net|
331                             tube\.midov\.pl|
332                             peertube\.fr|
333                             watch\.snoot\.tube|
334                             peertube\.donnadieu\.fr|
335                             argos\.aquilenet\.fr|
336                             tube\.nemsia\.org|
337                             tube\.bruniau\.net|
338                             videos\.darckoune\.moe|
339                             tube\.traydent\.info|
340                             dev\.videos\.lecygnenoir\.info|
341                             peertube\.nayya\.org|
342                             peertube\.live|
343                             peertube\.mofgao\.space|
344                             video\.lequerrec\.eu|
345                             peertube\.amicale\.net|
346                             aperi\.tube|
347                             tube\.ac-lyon\.fr|
348                             video\.lw1\.at|
349                             www\.yiny\.org|
350                             videos\.pofilo\.fr|
351                             tube\.lou\.lt|
352                             choob\.h\.etbus\.ch|
353                             tube\.hoga\.fr|
354                             peertube\.heberge\.fr|
355                             video\.obermui\.de|
356                             videos\.cloudfrancois\.fr|
357                             betamax\.video|
358                             video\.typica\.us|
359                             tube\.piweb\.be|
360                             video\.blender\.org|
361                             peertube\.cat|
362                             tube\.kdy\.ch|
363                             pe\.ertu\.be|
364                             peertube\.social|
365                             videos\.lescommuns\.org|
366                             tv\.datamol\.org|
367                             videonaute\.fr|
368                             dialup\.express|
369                             peertube\.nogafa\.org|
370                             megatube\.lilomoino\.fr|
371                             peertube\.tamanoir\.foucry\.net|
372                             peertube\.devosi\.org|
373                             peertube\.1312\.media|
374                             tube\.bootlicker\.party|
375                             skeptikon\.fr|
376                             video\.blueline\.mg|
377                             tube\.homecomputing\.fr|
378                             tube\.ouahpiti\.info|
379                             video\.tedomum\.net|
380                             video\.g3l\.org|
381                             fontube\.fr|
382                             peertube\.gaialabs\.ch|
383                             tube\.kher\.nl|
384                             peertube\.qtg\.fr|
385                             video\.migennes\.net|
386                             tube\.p2p\.legal|
387                             troll\.tv|
388                             videos\.iut-orsay\.fr|
389                             peertube\.solidev\.net|
390                             videos\.cemea\.org|
391                             video\.passageenseine\.fr|
392                             videos\.festivalparminous\.org|
393                             peertube\.touhoppai\.moe|
394                             sikke\.fi|
395                             peer\.hostux\.social|
396                             share\.tube|
397                             peertube\.walkingmountains\.fr|
398                             videos\.benpro\.fr|
399                             peertube\.parleur\.net|
400                             peertube\.heraut\.eu|
401                             tube\.aquilenet\.fr|
402                             peertube\.gegeweb\.eu|
403                             framatube\.org|
404                             thinkerview\.video|
405                             tube\.conferences-gesticulees\.net|
406                             peertube\.datagueule\.tv|
407                             video\.lqdn\.fr|
408                             tube\.mochi\.academy|
409                             media\.zat\.im|
410                             video\.colibris-outilslibres\.org|
411                             tube\.svnet\.fr|
412                             peertube\.video|
413                             peertube3\.cpy\.re|
414                             peertube2\.cpy\.re|
415                             videos\.tcit\.fr|
416                             peertube\.cpy\.re|
417                             canard\.tube
418                         )'''
419     _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
420     _API_BASE = 'https://%s/api/v1/videos/%s/%s'
421     _VALID_URL = r'''(?x)
422                     (?:
423                         peertube:(?P<host>[^:]+):|
424                         https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
425                     )
426                     (?P<id>%s)
427                     ''' % (_INSTANCES_RE, _UUID_RE)
428     _TESTS = [{
429         'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
430         'md5': '9bed8c0137913e17b86334e5885aacff',
431         'info_dict': {
432             'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
433             'ext': 'mp4',
434             'title': 'What is PeerTube?',
435             'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
436             'thumbnail': r're:https?://.*\.(?:jpg|png)',
437             'timestamp': 1538391166,
438             'upload_date': '20181001',
439             'uploader': 'Framasoft',
440             'uploader_id': '3',
441             'uploader_url': 'https://framatube.org/accounts/framasoft',
442             'channel': 'Les vidéos de Framasoft',
443             'channel_id': '2',
444             'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
445             'language': 'en',
446             'license': 'Attribution - Share Alike',
447             'duration': 113,
448             'view_count': int,
449             'like_count': int,
450             'dislike_count': int,
451             'tags': ['framasoft', 'peertube'],
452             'categories': ['Science & Technology'],
453         }
454     }, {
455         # Issue #26002
456         'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
457         'info_dict': {
458             'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc',
459             'ext': 'mp4',
460             'title': 'Dot matrix printer shell demo',
461             'uploader_id': '3',
462             'timestamp': 1587401293,
463             'upload_date': '20200420',
464             'uploader': 'Drew DeVault',
465         }
466     }, {
467         'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
468         'only_matching': True,
469     }, {
470         # nsfw
471         'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
472         'only_matching': True,
473     }, {
474         'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
475         'only_matching': True,
476     }, {
477         'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
478         'only_matching': True,
479     }, {
480         'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
481         'only_matching': True,
482     }]
483
484     @staticmethod
485     def _extract_peertube_url(webpage, source_url):
486         mobj = re.match(
487             r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)'
488             % PeerTubeIE._UUID_RE, source_url)
489         if mobj and any(p in webpage for p in (
490                 '<title>PeerTube<',
491                 'There will be other non JS-based clients to access PeerTube',
492                 '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
493             return 'peertube:%s:%s' % mobj.group('host', 'id')
494
495     @staticmethod
496     def _extract_urls(webpage, source_url):
497         entries = re.findall(
498             r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
499             % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
500         if not entries:
501             peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
502             if peertube_url:
503                 entries = [peertube_url]
504         return entries
505
506     def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
507         return self._download_json(
508             self._API_BASE % (host, video_id, path), video_id,
509             note=note, errnote=errnote, fatal=fatal)
510
511     def _get_subtitles(self, host, video_id):
512         captions = self._call_api(
513             host, video_id, 'captions', note='Downloading captions JSON',
514             fatal=False)
515         if not isinstance(captions, dict):
516             return
517         data = captions.get('data')
518         if not isinstance(data, list):
519             return
520         subtitles = {}
521         for e in data:
522             language_id = try_get(e, lambda x: x['language']['id'], compat_str)
523             caption_url = urljoin('https://%s' % host, e.get('captionPath'))
524             if not caption_url:
525                 continue
526             subtitles.setdefault(language_id or 'en', []).append({
527                 'url': caption_url,
528             })
529         return subtitles
530
531     def _real_extract(self, url):
532         mobj = re.match(self._VALID_URL, url)
533         host = mobj.group('host') or mobj.group('host_2')
534         video_id = mobj.group('id')
535
536         video = self._call_api(
537             host, video_id, '', note='Downloading video JSON')
538
539         title = video['name']
540
541         formats = []
542         files = video.get('files') or []
543         for playlist in (video.get('streamingPlaylists') or []):
544             if not isinstance(playlist, dict):
545                 continue
546             playlist_files = playlist.get('files')
547             if not (playlist_files and isinstance(playlist_files, list)):
548                 continue
549             files.extend(playlist_files)
550         for file_ in files:
551             if not isinstance(file_, dict):
552                 continue
553             file_url = url_or_none(file_.get('fileUrl'))
554             if not file_url:
555                 continue
556             file_size = int_or_none(file_.get('size'))
557             format_id = try_get(
558                 file_, lambda x: x['resolution']['label'], compat_str)
559             f = parse_resolution(format_id)
560             f.update({
561                 'url': file_url,
562                 'format_id': format_id,
563                 'filesize': file_size,
564             })
565             if format_id == '0p':
566                 f['vcodec'] = 'none'
567             else:
568                 f['fps'] = int_or_none(file_.get('fps'))
569             formats.append(f)
570         self._sort_formats(formats)
571
572         full_description = self._call_api(
573             host, video_id, 'description', note='Downloading description JSON',
574             fatal=False)
575
576         description = None
577         if isinstance(full_description, dict):
578             description = str_or_none(full_description.get('description'))
579         if not description:
580             description = video.get('description')
581
582         subtitles = self.extract_subtitles(host, video_id)
583
584         def data(section, field, type_):
585             return try_get(video, lambda x: x[section][field], type_)
586
587         def account_data(field, type_):
588             return data('account', field, type_)
589
590         def channel_data(field, type_):
591             return data('channel', field, type_)
592
593         category = data('category', 'label', compat_str)
594         categories = [category] if category else None
595
596         nsfw = video.get('nsfw')
597         if nsfw is bool:
598             age_limit = 18 if nsfw else 0
599         else:
600             age_limit = None
601
602         webpage_url = 'https://%s/videos/watch/%s' % (host, video_id)
603
604         return {
605             'id': video_id,
606             'title': title,
607             'description': description,
608             'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')),
609             'timestamp': unified_timestamp(video.get('publishedAt')),
610             'uploader': account_data('displayName', compat_str),
611             'uploader_id': str_or_none(account_data('id', int)),
612             'uploader_url': url_or_none(account_data('url', compat_str)),
613             'channel': channel_data('displayName', compat_str),
614             'channel_id': str_or_none(channel_data('id', int)),
615             'channel_url': url_or_none(channel_data('url', compat_str)),
616             'language': data('language', 'id', compat_str),
617             'license': data('licence', 'label', compat_str),
618             'duration': int_or_none(video.get('duration')),
619             'view_count': int_or_none(video.get('views')),
620             'like_count': int_or_none(video.get('likes')),
621             'dislike_count': int_or_none(video.get('dislikes')),
622             'age_limit': age_limit,
623             'tags': try_get(video, lambda x: x['tags'], list),
624             'categories': categories,
625             'formats': formats,
626             'subtitles': subtitles,
627             'webpage_url': webpage_url,
628         }