Browse Source

Merge 'ytdl-org/youtube-dl/master' release 2020.11.19

Old Extractors left behind:
	VLivePlaylistIE
	YoutubeSearchURLIE
	YoutubeShowIE
	YoutubeFavouritesIE

If removing old extractors, make corresponding changes in
	docs/supportedsites.md
	youtube_dlc/extractor/extractors.py

Not merged:
	.github/ISSUE_TEMPLATE/1_broken_site.md
	.github/ISSUE_TEMPLATE/2_site_support_request.md
	.github/ISSUE_TEMPLATE/3_site_feature_request.md
	.github/ISSUE_TEMPLATE/4_bug_report.md
	.github/ISSUE_TEMPLATE/5_feature_request.md
	test/test_all_urls.py
	youtube_dlc/version.py
	Changelog
pull/245/head
pukkandan 1 year ago
parent
commit
8bdd16b499
  1. 2
      devscripts/make_lazy_extractors.py
  2. 14
      docs/supportedsites.md
  3. 26
      test/test_all_urls.py
  4. 28
      test/test_utils.py
  5. 2
      youtube_dlc/extractor/afreecatv.py
  6. 167
      youtube_dlc/extractor/arte.py
  7. 268
      youtube_dlc/extractor/bandcamp.py
  8. 19
      youtube_dlc/extractor/cnbc.py
  9. 5
      youtube_dlc/extractor/common.py
  10. 27
      youtube_dlc/extractor/condenast.py
  11. 11
      youtube_dlc/extractor/extractors.py
  12. 47
      youtube_dlc/extractor/francetv.py
  13. 9
      youtube_dlc/extractor/generic.py
  14. 2
      youtube_dlc/extractor/iqiyi.py
  15. 88
      youtube_dlc/extractor/lbry.py
  16. 91
      youtube_dlc/extractor/lrt.py
  17. 60
      youtube_dlc/extractor/malltv.py
  18. 10
      youtube_dlc/extractor/mgtv.py
  19. 12
      youtube_dlc/extractor/mtv.py
  20. 5
      youtube_dlc/extractor/nbc.py
  21. 38
      youtube_dlc/extractor/ndr.py
  22. 157
      youtube_dlc/extractor/rai.py
  23. 111
      youtube_dlc/extractor/servus.py
  24. 153
      youtube_dlc/extractor/spiegel.py
  25. 11
      youtube_dlc/extractor/twentythreevideo.py
  26. 77
      youtube_dlc/extractor/urplay.py
  27. 82
      youtube_dlc/extractor/usanetwork.py
  28. 7
      youtube_dlc/extractor/ustream.py
  29. 11
      youtube_dlc/extractor/vimeo.py
  30. 285
      youtube_dlc/extractor/vlive.py
  31. 31
      youtube_dlc/extractor/xtube.py
  32. 7
      youtube_dlc/extractor/youporn.py
  33. 1643
      youtube_dlc/extractor/youtube.py
  34. 17
      youtube_dlc/utils.py

2
devscripts/make_lazy_extractors.py

@ -61,7 +61,7 @@ def build_lazy_ie(ie, name):
return s
# find the correct sorting and add the required base classes so that sublcasses
# find the correct sorting and add the required base classes so that subclasses
# can be correctly created
classes = _ALL_CLASSES[:-1]
ordered_cls = []

14
docs/supportedsites.md

@ -59,9 +59,9 @@
- **ARD:mediathek**
- **ARDBetaMediathek**
- **Arkena**
- **arte.tv:+7**
- **arte.tv:embed**
- **arte.tv:playlist**
- **ArteTV**
- **ArteTVEmbed**
- **ArteTVPlaylist**
- **AsianCrush**
- **AsianCrushPlaylist**
- **AtresPlayer**
@ -424,6 +424,7 @@
- **la7.it**
- **laola1tv**
- **laola1tv:embed**
- **lbry.tv**
- **LCI**
- **Lcp**
- **LcpPlay**
@ -835,8 +836,6 @@
- **SpankBangPlaylist**
- **Spankwire**
- **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de
- **Spiegeltv**
- **sport.francetvinfo.fr**
- **Sport5**
- **SportBox**
@ -1147,19 +1146,18 @@
- **YourPorn**
- **YourUpload**
- **youtube**: YouTube.com
- **youtube:channel**: YouTube.com channels
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
- **youtube:live**: YouTube.com live streams
- **youtube:playlist**: YouTube.com playlists
- **youtube:playlists**: YouTube.com user/channel playlists
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search_url**: YouTube.com search URLs
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **YoutubeYtUser**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:tab**: YouTube.com tab
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **Zaq1**

26
test/test_all_urls.py

@ -31,15 +31,17 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_playlist_matching(self):
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
assertTab = lambda url: self.assertMatch(url, ['youtube:tab'])
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertPlaylist('PL63F0C78739B09958')
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
# Top tracks
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
assertTab('https://www.youtube.com/playlist?list=MCUS.20142101')
def test_youtube_matching(self):
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
@ -50,26 +52,22 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
def test_youtube_channel_matching(self):
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
assertChannel = lambda url: self.assertMatch(url, ['youtube:tab'])
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
def test_youtube_user_matching(self):
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
# def test_youtube_user_matching(self):
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
def test_youtube_feeds(self):
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
def test_youtube_show_matching(self):
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
def test_youtube_search_matching(self):
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
# def test_youtube_search_matching(self):
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
def test_youtube_extract(self):
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)

28
test/test_utils.py

@ -937,6 +937,28 @@ class TestUtil(unittest.TestCase):
self.assertEqual(d['x'], 1)
self.assertEqual(d['y'], 'a')
# Just drop ! prefix for now though this results in a wrong value
on = js_to_json('''{
a: !0,
b: !1,
c: !!0,
d: !!42.42,
e: !!![],
f: !"abc",
g: !"",
!42: 42
}''')
self.assertEqual(json.loads(on), {
'a': 0,
'b': 1,
'c': 0,
'd': 42.42,
'e': [],
'f': "abc",
'g': "",
'42': 42
})
on = js_to_json('["abc", "def",]')
self.assertEqual(json.loads(on), ['abc', 'def'])
@ -994,6 +1016,12 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{42:4.2e1}')
self.assertEqual(json.loads(on), {'42': 42.0})
on = js_to_json('{ "0x40": "0x40" }')
self.assertEqual(json.loads(on), {'0x40': '0x40'})
on = js_to_json('{ "040": "040" }')
self.assertEqual(json.loads(on), {'040': '040'})
def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1')

2
youtube_dlc/extractor/afreecatv.py

@ -275,7 +275,7 @@ class AfreecaTVIE(InfoExtractor):
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
if video_element is None or video_element.text is None:
raise ExtractorError(
'Video %s video does not exist' % video_id, expected=True)
'Video %s does not exist' % video_id, expected=True)
video_url = video_element.text.strip()

167
youtube_dlc/extractor/arte.py

@ -4,23 +4,57 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import (
ExtractorError,
int_or_none,
qualities,
try_get,
unified_strdate,
url_or_none,
)
# There are different sources of video in arte.tv, the extraction process
# is different for each one. The videos usually expire in 7 days, so we can't
# add tests.
class ArteTVBaseIE(InfoExtractor):
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
info = self._download_json(json_url, video_id)
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
_API_BASE = 'https://api.arte.tv/api/player/v1'
class ArteTVIE(ArteTVBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
)
/(?P<id>\d{6}-\d{3}-[AF])
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'info_dict': {
'id': '088501-000-A',
'ext': 'mp4',
'title': 'Mexico: Stealing Petrol to Survive',
'upload_date': '20190628',
},
}, {
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
'only_matching': True,
}, {
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2')
info = self._download_json(
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
player_info = info['videoJsonPlayer']
vsr = try_get(player_info, lambda x: x['VSR'], dict)
@ -37,18 +71,11 @@ class ArteTVBaseIE(InfoExtractor):
if not upload_date_str:
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
title = (player_info.get('VTI') or title or player_info['VID']).strip()
title = (player_info.get('VTI') or player_info['VID']).strip()
subtitle = player_info.get('VSU', '').strip()
if subtitle:
title += ' - %s' % subtitle
info_dict = {
'id': player_info['VID'],
'title': title,
'description': player_info.get('VDE'),
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
}
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
LANGS = {
@ -65,6 +92,10 @@ class ArteTVBaseIE(InfoExtractor):
formats = []
for format_id, format_dict in vsr.items():
f = dict(format_dict)
format_url = url_or_none(f.get('url'))
streamer = f.get('streamer')
if not format_url and not streamer:
continue
versionCode = f.get('versionCode')
l = re.escape(langcode)
@ -107,6 +138,16 @@ class ArteTVBaseIE(InfoExtractor):
else:
lang_pref = -1
media_type = f.get('mediaType')
if media_type == 'hls':
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
for m3u8_format in m3u8_formats:
m3u8_format['language_preference'] = lang_pref
formats.extend(m3u8_formats)
continue
format = {
'format_id': format_id,
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
@ -118,7 +159,7 @@ class ArteTVBaseIE(InfoExtractor):
'quality': qfunc(f.get('quality')),
}
if f.get('mediaType') == 'rtmp':
if media_type == 'rtmp':
format['url'] = f['streamer']
format['play_path'] = 'mp4:' + f['url']
format['ext'] = 'flv'
@ -127,56 +168,50 @@ class ArteTVBaseIE(InfoExtractor):
formats.append(format)
self._check_formats(formats, video_id)
self._sort_formats(formats)
info_dict['formats'] = formats
return info_dict
return {
'id': player_info.get('VID') or video_id,
'title': title,
'description': player_info.get('VDE'),
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
'formats': formats,
}
class ArteTVPlus7IE(ArteTVBaseIE):
IE_NAME = 'arte.tv:+7'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
class ArteTVEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
'info_dict': {
'id': '088501-000-A',
'id': '100605-013-A',
'ext': 'mp4',
'title': 'Mexico: Stealing Petrol to Survive',
'upload_date': '20190628',
'title': 'United we Stream November Lockdown Edition #13',
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
'upload_date': '20201116',
},
}, {
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True,
}]
def _real_extract(self, url):
lang, video_id = re.match(self._VALID_URL, url).groups()
return self._extract_from_json_url(
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
video_id, lang)
class ArteTVEmbedIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:embed'
_VALID_URL = r'''(?x)
https://www\.arte\.tv
/player/v3/index\.php\?json_url=
(?P<json_url>
https?://api\.arte\.tv/api/player/v1/config/
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
)
'''
_TESTS = []
@staticmethod
def _extract_urls(webpage):
return [url for _, url in re.findall(
r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
webpage)]
def _real_extract(self, url):
json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
return self._extract_from_json_url(json_url, video_id, lang)
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
json_url = qs['json_url'][0]
video_id = ArteTVIE._match_id(json_url)
return self.url_result(
json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
class ArteTVPlaylistIE(ArteTVBaseIE):
IE_NAME = 'arte.tv:playlist'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
'info_dict': {
@ -185,17 +220,35 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
},
'playlist_mincount': 6,
}, {
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
'only_matching': True,
}]
def _real_extract(self, url):
lang, playlist_id = re.match(self._VALID_URL, url).groups()
collection = self._download_json(
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
% (lang, playlist_id), playlist_id)
'%s/collectionData/%s/%s?source=videos'
% (self._API_BASE, lang, playlist_id), playlist_id)
entries = []
for video in collection['videos']:
if not isinstance(video, dict):
continue
video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
if not video_url:
continue
video_id = video.get('programId')
entries.append({
'_type': 'url_transparent',
'url': video_url,
'id': video_id,
'title': video.get('title'),
'alt_title': video.get('subtitle'),
'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
'duration': int_or_none(video.get('durationSeconds')),
'view_count': int_or_none(video.get('views')),
'ie_key': ArteTVIE.ie_key(),
})
title = collection.get('title')
description = collection.get('shortDescription') or collection.get('teaserText')
entries = [
self._extract_from_json_url(
video['jsonUrl'], video.get('programId') or playlist_id, lang)
for video in collection['videos'] if video.get('jsonUrl')]
return self.playlist_result(entries, playlist_id, title, description)

268
youtube_dlc/extractor/bandcamp.py

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals
import random
@ -5,10 +6,7 @@ import re
import time
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urlparse,
)
from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
@ -17,71 +15,32 @@ from ..utils import (
parse_filesize,
str_or_none,
try_get,
unescapeHTML,
update_url_query,
unified_strdate,
unified_timestamp,
url_or_none,
urljoin,
)
class BandcampBaseIE(InfoExtractor):
"""Provide base functions for Bandcamp extractors"""
def _extract_json_from_html_data_attribute(self, webpage, suffix, video_id):
json_string = self._html_search_regex(
r' data-%s="([^"]*)' % suffix,
webpage, '%s json' % suffix, default='{}')
return self._parse_json(json_string, video_id)
def _parse_json_track(self, json):
formats = []
file_ = json.get('file')
if isinstance(file_, dict):
for format_id, format_url in file_.items():
if not url_or_none(format_url):
continue
ext, abr_str = format_id.split('-', 1)
formats.append({
'format_id': format_id,
'url': self._proto_relative_url(format_url, 'http:'),
'ext': ext,
'vcodec': 'none',
'acodec': ext,
'abr': int_or_none(abr_str),
})
return {
'duration': float_or_none(json.get('duration')),
'id': str_or_none(json.get('track_id') or json.get('id')),
'title': json.get('title'),
'title_link': json.get('title_link'),
'number': int_or_none(json.get('track_num')),
'formats': formats
}
class BandcampIE(BandcampBaseIE):
IE_NAME = "Bandcamp:track"
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
class BandcampIE(InfoExtractor):
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
'md5': 'c557841d5e50261777a6585648adf439',
'info_dict': {
'id': '1812978515',
'ext': 'mp3',
'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭",
'duration': 9.8485,
'uploader': "youtube-dl \"'/\\\u00e4\u21ad",
'timestamp': 1354224127,
'uploader': 'youtube-dl "\'/\\ä↭',
'upload_date': '20121129',
'timestamp': 1354224127,
},
'_skip': 'There is a limit of 200 free downloads / month for the test song'
}, {
# free download
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
'md5': '5d92af55811e47f38962a54c30b07ef0',
'info_dict': {
'id': '2650410135',
'ext': 'aiff',
@ -120,52 +79,59 @@ class BandcampIE(BandcampBaseIE):
},
}]
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
return self._parse_json(self._html_search_regex(
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
attr + ' data', group=2), video_id, fatal=fatal)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
url_track_title = title
title = self._match_id(url)
webpage = self._download_webpage(url, title)
thumbnail = self._html_search_meta('og:image', webpage, default=None)
json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", url_track_title)
json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", url_track_title)
json_tracks = json_tralbum.get('trackinfo')
if not json_tracks:
raise ExtractorError('Could not extract track')
track = self._parse_json_track(json_tracks[0])
artist = json_tralbum.get('artist')
album_title = json_embed.get('album_title')
json_album = json_tralbum.get('packages')
if json_album:
json_album = json_album[0]
album_publish_date = json_album.get('album_publish_date')
album_release_date = json_album.get('album_release_date')
else:
album_publish_date = None
album_release_date = json_tralbum.get('album_release_date')
timestamp = unified_timestamp(json_tralbum.get('current', {}).get('publish_date') or album_publish_date)
release_date = unified_strdate(album_release_date)
download_link = self._search_regex(
r'freeDownloadPage(?:["\']|&quot;):\s*(["\']|&quot;)(?P<url>(?:(?!\1).)+)\1', webpage,
'download link', default=None, group='url')
tralbum = self._extract_data_attr(webpage, title)
thumbnail = self._og_search_thumbnail(webpage)
track_id = None
track = None
track_number = None
duration = None
formats = []
track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
if track_info:
file_ = track_info.get('file')
if isinstance(file_, dict):
for format_id, format_url in file_.items():
if not url_or_none(format_url):
continue
ext, abr_str = format_id.split('-', 1)
formats.append({
'format_id': format_id,
'url': self._proto_relative_url(format_url, 'http:'),
'ext': ext,
'vcodec': 'none',
'acodec': ext,
'abr': int_or_none(abr_str),
})
track = track_info.get('title')
track_id = str_or_none(
track_info.get('track_id') or track_info.get('id'))
track_number = int_or_none(track_info.get('track_num'))
duration = float_or_none(track_info.get('duration'))
embed = self._extract_data_attr(webpage, title, 'embed', False)
current = tralbum.get('current') or {}
artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
timestamp = unified_timestamp(
current.get('publish_date') or tralbum.get('album_publish_date'))
download_link = tralbum.get('freeDownloadPage')
if download_link:
track_id = self._search_regex(
r'\?id=(?P<id>\d+)&',
download_link, 'track id')
track_id = compat_str(tralbum['id'])
download_webpage = self._download_webpage(
download_link, track_id, 'Downloading free downloads page')
blob = self._parse_json(
self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
'blob', group='blob'),
track_id, transform_source=unescapeHTML)
blob = self._extract_data_attr(download_webpage, track_id, 'blob')
info = try_get(
blob, (lambda x: x['digital_items'][0],
@ -173,6 +139,8 @@ class BandcampIE(BandcampBaseIE):
if info:
downloads = info.get('downloads')
if isinstance(downloads, dict):
if not track:
track = info.get('title')
if not artist:
artist = info.get('artist')
if not thumbnail:
@ -206,7 +174,7 @@ class BandcampIE(BandcampBaseIE):
retry_url = url_or_none(stat.get('retry_url'))
if not retry_url:
continue
track['formats'].append({
formats.append({
'url': self._proto_relative_url(retry_url, 'http:'),
'ext': download_formats.get(format_id),
'format_id': format_id,
@ -215,30 +183,34 @@ class BandcampIE(BandcampBaseIE):
'vcodec': 'none',
})
self._sort_formats(track['formats'])
self._sort_formats(formats)
title = '%s - %s' % (artist, track.get('title')) if artist else track.get('title')
title = '%s - %s' % (artist, track) if artist else track
if not duration:
duration = float_or_none(self._html_search_meta(
'duration', webpage, default=None))
return {
'album': album_title,
'artist': artist,
'duration': track['duration'],
'formats': track['formats'],
'id': track['id'],
'release_date': release_date,
'id': track_id,
'title': title,
'thumbnail': thumbnail,
'uploader': artist,
'timestamp': timestamp,
'title': title,
'track': track['title'],
'track_id': track['id'],
'track_number': track['number'],
'uploader': artist
'release_date': unified_strdate(tralbum.get('album_release_date')),
'duration': duration,
'track': track,
'track_number': track_number,
'track_id': track_id,
'artist': artist,
'album': embed.get('album_title'),
'formats': formats,
}
class BandcampAlbumIE(BandcampBaseIE):
class BandcampAlbumIE(BandcampIE):
IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
_TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@ -248,7 +220,10 @@ class BandcampAlbumIE(BandcampBaseIE):
'info_dict': {
'id': '1353101989',
'ext': 'mp3',
'title': 'Intro',
'title': 'Blazo - Intro',
'timestamp': 1311756226,
'upload_date': '20110727',
'uploader': 'Blazo',
}
},
{
@ -256,7 +231,10 @@ class BandcampAlbumIE(BandcampBaseIE):
'info_dict': {
'id': '38097443',
'ext': 'mp3',
'title': 'Kero One - Keep It Alive (Blazo remix)',
'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
'timestamp': 1311757238,
'upload_date': '20110727',
'uploader': 'Blazo',
}
},
],
@ -292,6 +270,7 @@ class BandcampAlbumIE(BandcampBaseIE):
'title': '"Entropy" EP',
'uploader_id': 'jstrecords',
'id': 'entropy-ep',
'description': 'md5:0ff22959c943622972596062f2f366a5',
},
'playlist_mincount': 3,
}, {
@ -301,6 +280,7 @@ class BandcampAlbumIE(BandcampBaseIE):
'id': 'we-are-the-plague',
'title': 'WE ARE THE PLAGUE',
'uploader_id': 'insulters',
'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
},
'playlist_count': 2,
}]
@ -312,41 +292,34 @@ class BandcampAlbumIE(BandcampBaseIE):
else super(BandcampAlbumIE, cls).suitable(url))
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
uploader_id = mobj.group('subdomain')
album_id = mobj.group('album_id')
uploader_id, album_id = re.match(self._VALID_URL, url).groups()
playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id)
json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", playlist_id)
json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", playlist_id)
json_tracks = json_tralbum.get('trackinfo')
if not json_tracks:
raise ExtractorError('Could not extract album tracks')
album_title = json_embed.get('album_title')
tralbum = self._extract_data_attr(webpage, playlist_id)
track_info = tralbum.get('trackinfo')
if not track_info:
raise ExtractorError('The page doesn\'t contain any tracks')
# Only tracks with duration info have songs
tracks = [self._parse_json_track(track) for track in json_tracks]
entries = [
self.url_result(
compat_urlparse.urljoin(url, track['title_link']),
ie=BandcampIE.ie_key(), video_id=track['id'],
video_title=track['title'])
for track in tracks
if track.get('duration')]
urljoin(url, t['title_link']), BandcampIE.ie_key(),
str_or_none(t.get('track_id') or t.get('id')), t.get('title'))
for t in track_info
if t.get('duration')]
current = tralbum.get('current') or {}
return {
'_type': 'playlist',
'uploader_id': uploader_id,
'id': playlist_id,
'title': album_title,
'entries': entries
'title': current.get('title'),
'description': current.get('about'),
'entries': entries,
}
class BandcampWeeklyIE(InfoExtractor):
class BandcampWeeklyIE(BandcampIE):
IE_NAME = 'Bandcamp:weekly'
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
_TESTS = [{
@ -361,29 +334,23 @@ class BandcampWeeklyIE(InfoExtractor):
'release_date': '20170404',
'series': 'Bandcamp Weekly',
'episode': 'Magic Moments',
'episode_number': 208,
'episode_id': '224',
}
},
'params': {
'format': 'opus-lo',
},
}, {
'url': 'https://bandcamp.com/?blah/blah@&show=228',
'only_matching': True
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
blob = self._parse_json(
self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
'blob', group='blob'),
video_id, transform_source=unescapeHTML)
show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id)
show = blob['bcw_show']
blob = self._extract_data_attr(webpage, show_id, 'blob')
# This is desired because any invalid show id redirects to `bandcamp.com`
# which happens to expose the latest Bandcamp Weekly episode.
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
show = blob['bcw_data'][show_id]
formats = []
for format_id, format_url in show['audio_stream'].items():
@ -408,20 +375,8 @@ class BandcampWeeklyIE(InfoExtractor):
if subtitle:
title += ' - %s' % subtitle
episode_number = None
seq = blob.get('bcw_seq')
if seq and isinstance(seq, list):
try:
episode_number = next(
int_or_none(e.get('episode_number'))
for e in seq
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
except StopIteration:
pass
return {
'id': video_id,
'id': show_id,
'title': title,
'description': show.get('desc') or show.get('short_desc'),
'duration': float_or_none(show.get('audio_duration')),
@ -429,7 +384,6 @@ class BandcampWeeklyIE(InfoExtractor):
'release_date': unified_strdate(show.get('published_date')),
'series': 'Bandcamp Weekly',
'episode': show.get('subtitle'),
'episode_number': episode_number,
'episode_id': compat_str(video_id),
'episode_id': show_id,
'formats': formats
}

19
youtube_dlc/extractor/cnbc.py

@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import smuggle_url
@ -38,7 +39,7 @@ class CNBCIE(InfoExtractor):
class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
_TEST = {
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
'info_dict': {
@ -56,11 +57,15 @@ class CNBCVideoIE(InfoExtractor):
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
'video id')
path, display_id = re.match(self._VALID_URL, url).groups()
video_id = self._download_json(
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
'query': '''{
page(path: "%s") {
vcpsId
}
}''' % path,
})['data']['page']['vcpsId']
return self.url_result(
'http://video.cnbc.com/gallery/?video=%s' % video_id,
'http://video.cnbc.com/gallery/?video=%d' % video_id,
CNBCIE.ie_key())

5
youtube_dlc/extractor/common.py

@ -1456,9 +1456,10 @@ class InfoExtractor(object):
try:
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
return True
except ExtractorError:
except ExtractorError as e:
self.to_screen(
'%s: %s URL is invalid, skipping' % (video_id, item))
'%s: %s URL is invalid, skipping: %s'
% (video_id, item, error_to_compat_str(e.cause)))
return False
def http_scheme(self):

27
youtube_dlc/extractor/condenast.py

@ -16,6 +16,8 @@ from ..utils import (
mimetype2ext,
orderedSet,
parse_iso8601,
strip_or_none,
try_get,
)
@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):
'uploader': 'gq',
'upload_date': '20170321',
'timestamp': 1490126427,
'description': 'How much grimmer would things be if these people were competent?',
},
}, {
# JS embed
@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
'uploader': 'arstechnica',
'upload_date': '20150916',
'timestamp': 1442434955,
'timestamp': 1442434920,
}
}, {
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor):
})
self._sort_formats(formats)
subtitles = {}
for t, caption in video_info.get('captions', {}).items():
caption_url = caption.get('src')
if not (t in ('vtt', 'srt', 'tml') and caption_url):
continue
subtitles.setdefault('en', []).append({'url': caption_url})
return {
'id': video_id,
'formats': formats,
@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor):
'season': video_info.get('season_title'),
'timestamp': parse_iso8601(video_info.get('premiere_date')),
'categories': video_info.get('categories'),
'subtitles': subtitles,
}
def _real_extract(self, url):
@ -225,8 +236,16 @@ class CondeNastIE(InfoExtractor):
if url_type == 'series':
return self._extract_series(url, webpage)
else:
params = self._extract_video_params(webpage, display_id)
info = self._search_json_ld(
webpage, display_id, fatal=False)
video = try_get(self._parse_json(self._search_regex(
r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
'preload state', '{}'), display_id),
lambda x: x['transformed']['video'])
if video:
params = {'videoId': video['id']}
info = {'description': strip_or_none(video.get('description'))}
else:
params = self._extract_video_params(webpage, display_id)
info = self._search_json_ld(
webpage, display_id, fatal=False)
info.update(self._extract_video(params))
return info

11
youtube_dlc/extractor/extractors.py

@ -62,7 +62,7 @@ from .ard import (
ARDMediathekIE,
)
from .arte import (
ArteTVPlus7IE,
ArteTVIE,
ArteTVEmbedIE,
ArteTVPlaylistIE,
)
@ -542,6 +542,7 @@ from .laola1tv import (
EHFTVIE,
ITTFIE,
)
from .lbry import LBRYIE
from .lci import LCIIE
from .lcp import (
LcpPlayIE,
@ -1079,8 +1080,7 @@ from .spankbang import (
SpankBangPlaylistIE,
)
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE, SpiegelArticleIE
from .spiegeltv import SpiegeltvIE
from .spiegel import SpiegelIE
from .spike import (
BellatorIE,
ParamountNetworkIE,
@ -1505,12 +1505,11 @@ from .yourporn import YourPornIE
from .yourupload import YourUploadIE
from .youtube import (
YoutubeIE,
YoutubeChannelIE,
YoutubeFavouritesIE,
YoutubeHistoryIE,
YoutubeLiveIE,
YoutubeTabIE,
YoutubePlaylistIE,
YoutubePlaylistsIE,
YoutubeRecommendedIE,
YoutubeSearchDateIE,
YoutubeSearchIE,
@ -1519,7 +1518,7 @@ from .youtube import (
YoutubeSubscriptionsIE,
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
YoutubeUserIE,
YoutubeYtUserIE,
YoutubeWatchLaterIE,
)
from .zapiks import ZapiksIE

47
youtube_dlc/extractor/francetv.py

@ -17,6 +17,7 @@ from ..utils import (
parse_duration,
try_get,
url_or_none,
urljoin,
)
from .dailymotion import DailymotionIE
@ -128,18 +129,38 @@ class FranceTVIE(InfoExtractor):
is_live = None
formats = []
for video in info['videos']:
if video['statut'] != 'ONLINE':
videos = []
for video in (info.get('videos') or []):
if video.get('statut') != 'ONLINE':
continue
video_url = video['url']
if not video.get('url'):
continue
videos.append(video)
if not videos:
for device_type in ['desktop', 'mobile']:
fallback_info = self._download_json(
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
video_id, 'Downloading fallback %s video JSON' % device_type, query={
'device_type': device_type,
'browser': 'chrome',
}, fatal=False)
if fallback_info and fallback_info.get('video'):
videos.append(fallback_info['video'])
formats = []
for video in videos:
video_url = video.get('url')
if not video_url:
continue
if is_live is None:
is_live = (try_get(
video, lambda x: x['plages_ouverture'][0]['direct'],
bool) is True) or '/live.francetv.fr/' in video_url
format_id = video['format']
video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True
or video.get('is_live') is True
or '/live.francetv.fr/' in video_url)
format_id = video.get('format')
ext = determine_ext(video_url)
if ext == 'f4m':
if georestricted:
@ -154,6 +175,9 @@ class FranceTVIE(InfoExtractor):
sign(video_url, format_id), video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id=format_id,
fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
elif video_url.startswith('rtmp'):
formats.append({
'url': video_url,
@ -166,6 +190,7 @@ class FranceTVIE(InfoExtractor):
'url': video_url,
'format_id': format_id,
})
self._sort_formats(formats)
title = info['titre']
@ -185,10 +210,10 @@ class FranceTVIE(InfoExtractor):
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
'description': clean_html(info['synopsis']),
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
'timestamp': int_or_none(info['diffusion']['timestamp']),
'description': clean_html(info.get('synopsis')),
'thumbnail': urljoin('http://pluzz.francetv.fr', info.get('image')),
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
'is_live': is_live,
'formats': formats,
'subtitles': subtitles,

9
youtube_dlc/extractor/generic.py

@ -91,6 +91,7 @@ from .piksel import PikselIE
from .videa import VideaIE
from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE
from .arte import ArteTVEmbedIE
from .videopress import VideoPressIE
from .rutube import RutubeIE
from .limelight import LimelightBaseIE
@ -2760,11 +2761,9 @@ class GenericIE(InfoExtractor):
return self.url_result(ustream_url, UstreamIE.ie_key())
# Look for embedded arte.tv player
mobj = re.search(
r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
arte_urls = ArteTVEmbedIE._extract_urls(webpage)
if arte_urls:
return self.playlist_from_matches(arte_urls, video_id, video_title)
# Look for embedded francetv player
mobj = re.search(

2
youtube_dlc/extractor/iqiyi.py

@ -150,7 +150,7 @@ class IqiyiSDKInterpreter(object):
elif function in other_functions:
other_functions[function]()
else:
raise ExtractorError('Unknown funcion %s' % function)
raise ExtractorError('Unknown function %s' % function)
return sdk.target

88
youtube_dlc/extractor/lbry.py

@ -0,0 +1,88 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
mimetype2ext,
try_get,
)
class LBRYIE(InfoExtractor):
IE_NAME = 'lbry.tv'
_VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[0-9a-zA-Z-]+:[0-9a-z]+/[0-9a-zA-Z().-]+:[0-9a-z])'
_TESTS = [{
# Video
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
'info_dict': {
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
'ext': 'mp4',
'title': 'First day in LBRY? Start HERE!',
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
'timestamp': 1595694354,
'upload_date': '20200725',
}
}, {
# Audio
'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e',
'md5': 'c94017d3eba9b49ce085a8fad6b98d00',
'info_dict': {
'id': 'e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
'ext': 'mp3',
'title': 'The LBRY Foundation Community Podcast Episode 1 - Introduction, Streaming on LBRY, Transcoding',
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
'timestamp': 1591312601,
'upload_date': '20200604',
}
}, {
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
'only_matching': True,
}]
def _call_api_proxy(self, method, display_id, params):
return self._download_json(
'https://api.lbry.tv/api/v1/proxy', display_id,
headers={'Content-Type': 'application/json-rpc'},
data=json.dumps({
'method': method,
'params': params,
}).encode())['result']
def _real_extract(self, url):
display_id = self._match_id(url).replace(':', '#')
uri = 'lbry://' + display_id
result = self._call_api_proxy(
'resolve', display_id, {'urls': [uri]})[uri]
result_value = result['value']
if result_value.get('stream_type') not in ('video', 'audio'):
raise ExtractorError('Unsupported URL', expected=True)
streaming_url = self._call_api_proxy(
'get', display_id, {'uri': uri})['streaming_url']
source = result_value.get('source') or {}
media = result_value.get('video') or result_value.get('audio') or {}
signing_channel = result_value.get('signing_channel') or {}
return {
'id': result['claim_id'],
'title': result_value['title'],
'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str),
'description': result_value.get('description'),
'license': result_value.get('license'),
'timestamp': int_or_none(result.get('timestamp')),
'tags': result_value.get('tags'),
'width': int_or_none(media.get('width')),
'height': int_or_none(media.get('height')),
'duration': int_or_none(media.get('duration')),
'channel': signing_channel.get('name'),
'channel_id': signing_channel.get('claim_id'),
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
'filesize': int_or_none(source.get('size')),
'url': streaming_url,
}

91
youtube_dlc/extractor/lrt.py

@ -5,28 +5,26 @@ import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
parse_duration,
remove_end,
clean_html,
merge_dicts,
)
class LRTIE(InfoExtractor):
IE_NAME = 'lrt.lt'
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
_TESTS = [{
# m3u8 download
'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
'md5': 'fe44cf7e4ab3198055f2c598fc175cb0',
'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
'md5': '85cb2bb530f31d91a9c65b479516ade4',
'info_dict': {
'id': '54391',
'id': '2000127261',
'ext': 'mp4',
'title': 'Septynios Kauno dienos',
'description': 'md5:24d84534c7dc76581e59f5689462411a',
'duration': 1783,
'view_count': int,
'like_count': int,
'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
'duration': 3035,
'timestamp': 1604079000,
'upload_date': '20201030',
},
}, {
# direct mp3 download
@ -43,52 +41,35 @@ class LRTIE(InfoExtractor):
},
}]
def _extract_js_var(self, webpage, var_name, default):
return self._search_regex(
r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name,
webpage, var_name.replace('_', ' '), default, group=2)
def _real_extract(self, url):
video_id = self._match_id(url)
path, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id)