[tunein] Ignore reliability if it's >90% (#4097)
[ytdl] / youtube_dl / extractor / tunein.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import json
5 import re
6
7 from .common import InfoExtractor
8 from ..utils import ExtractorError
9
10
11 class TuneInIE(InfoExtractor):
12     _VALID_URL = r'''(?x)https?://(?:www\.)?
13     (?:
14         tunein\.com/
15         (?:
16             radio/.*?-s|
17             station/.*?StationId\=
18         )(?P<id>[0-9]+)
19         |tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
20     )
21     '''
22     _API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station'
23
24     _INFO_DICT = {
25         'id': '34682',
26         'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
27         'ext': 'aac',
28         'thumbnail': 're:^https?://.*\.png$',
29         'location': 'Tacoma, WA',
30     }
31     _TESTS = [
32         {
33             'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
34             'info_dict': _INFO_DICT,
35             'params': {
36                 'skip_download': True,  # live stream
37             },
38         },
39         {  # test redirection
40             'url': 'http://tun.in/ser7s',
41             'info_dict': _INFO_DICT,
42             'params': {
43                 'skip_download': True,  # live stream
44             },
45         },
46     ]
47
48     def _real_extract(self, url):
49         mobj = re.match(self._VALID_URL, url)
50         redirect_id = mobj.group('redirect_id')
51         if redirect_id:
52             # The server doesn't support HEAD requests
53             urlh = self._request_webpage(
54                 url, redirect_id, note='Downloading redirect page')
55             url = urlh.geturl()
56             self.to_screen('Following redirect: %s' % url)
57             mobj = re.match(self._VALID_URL, url)
58         station_id = mobj.group('id')
59
60         station_info = self._download_json(
61             self._API_URL_TEMPLATE.format(station_id),
62             station_id, note='Downloading station JSON')
63
64         title = station_info['Title']
65         thumbnail = station_info.get('Logo')
66         location = station_info.get('Location')
67         streams_url = station_info.get('StreamUrl')
68         if not streams_url:
69             raise ExtractorError('No downloadable streams found',
70                                  expected=True)
71         stream_data = self._download_webpage(
72             streams_url, station_id, note='Downloading stream data')
73         streams = json.loads(self._search_regex(
74             r'\((.*)\);', stream_data, 'stream info'))['Streams']
75
76         is_live = None
77         formats = []
78         for stream in streams:
79             if stream.get('Type') == 'Live':
80                 is_live = True
81             reliability = stream.get('Reliability')
82             format_note = (
83                 'Reliability: %d%%' % reliability
84                 if reliability is not None else None)
85             formats.append({
86                 'preference': (
87                     0 if reliability is None or reliability > 90
88                     else 1),
89                 'abr': stream.get('Bandwidth'),
90                 'ext': stream.get('MediaType').lower(),
91                 'acodec': stream.get('MediaType'),
92                 'vcodec': 'none',
93                 'url': stream.get('Url'),
94                 'source_preference': reliability,
95                 'format_note': format_note,
96             })
97         self._sort_formats(formats)
98
99         return {
100             'id': station_id,
101             'title': title,
102             'formats': formats,
103             'thumbnail': thumbnail,
104             'location': location,
105             'is_live': is_live,
106         }