-
-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #185 from C0D3D3V/pr_add_extractors
implement #179 add extractor for sharepoint, echo360 and update owncloud extractor
- Loading branch information
Showing
5 changed files
with
334 additions
and
113 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
import re | ||
|
||
from yt_dlp.extractor.common import InfoExtractor | ||
from yt_dlp.utils import ( | ||
determine_ext, | ||
float_or_none, | ||
traverse_obj, | ||
variadic, | ||
) | ||
|
||
|
||
class Echo360IE(InfoExtractor): | ||
_INSTANCES_RE = r'''(?: | ||
echo360\.ca| | ||
echo360\.net\.au| | ||
echo360\.org\.au| | ||
echo360\.org\.uk| | ||
echo360\.org| | ||
)''' | ||
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' | ||
_VALID_URL = rf'''(?x) | ||
https?://(?P<host>{_INSTANCES_RE}) | ||
/media/(?P<id>{_UUID_RE})/public''' | ||
|
||
_API_BASE = 'https://%s/api/ui/echoplayer/public-links/%s/media/%s/player-properties' | ||
|
||
_TESTS = [ | ||
{ | ||
'url': 'https://echo360.org.uk/media/1d8392aa-a3e7-4e78-94cf-b6532c27208c/public', | ||
'info_dict': { | ||
'id': '3c7ae6e0-fa19-432d-aa21-c283b4276f2a', | ||
'ext': 'mp4', | ||
'title': '3-4 Force + moment + mechanics.mp4', | ||
'duration': 4731.888, | ||
}, | ||
'params': {'skip_download': 'm3u8'}, | ||
}, | ||
{ | ||
'url': 'https://echo360.net.au/media/f04960a9-2efc-4b63-87b5-72e629081d15/public', | ||
'info_dict': { | ||
'id': '6098a147-2d65-40f3-b9e9-a0204afe450c', | ||
'ext': 'mp4', | ||
'title': 'EXSC634_Online_Workshop_Week_4.mp4', | ||
'duration': 6659.72, | ||
}, | ||
'params': {'skip_download': 'm3u8'}, | ||
}, | ||
] | ||
|
||
def _call_api(self, host, video_id, media_id, session_token, **kwargs): | ||
return self._download_json( | ||
self._API_BASE % (host, video_id, media_id), | ||
video_id, | ||
headers={'Authorization': f'Bearer {session_token}'}, | ||
**kwargs, | ||
) | ||
|
||
@staticmethod | ||
def _update_url_query(uri, query_string): | ||
if query_string is not None: | ||
return f'{uri.split("?", 1)[0]}?{query_string}' | ||
return uri | ||
|
||
@staticmethod | ||
def _get_query_string(uri, query_strings): | ||
uri_base = uri.split("?", 1)[0] | ||
for query_string in query_strings: | ||
if re.match(query_string['uriPattern'], uri_base): | ||
return query_string['queryString'] | ||
return None | ||
|
||
def _parse_mediapackage(self, video): | ||
video_id = video['playableAudioVideo']['mediaId'] | ||
query_strings = traverse_obj(video, ('sourceQueryStrings', 'queryStrings')) or [] | ||
|
||
formats = [] | ||
for track in variadic(traverse_obj(video, ('playableAudioVideo', 'playableMedias')) or []): | ||
href = track.get('uri') | ||
if href is None: | ||
continue | ||
href = self._update_url_query(href, self._get_query_string(href, query_strings)) | ||
if track.get('isHls') or determine_ext(href, None) == 'm3u8': | ||
hls_formats = self._extract_m3u8_formats( | ||
href, video_id, live=track.get('isLive'), m3u8_id='hls', entry_protocol='m3u8_native', fatal=False | ||
) | ||
|
||
for hls_format in hls_formats: | ||
query_string = self._get_query_string(hls_format['url'], query_strings) | ||
hls_format['extra_param_to_segment_url'] = query_string | ||
hls_format['url'] = self._update_url_query(hls_format['url'], query_string) | ||
|
||
formats.extend(hls_formats) | ||
|
||
return { | ||
'id': video_id, | ||
'formats': formats, | ||
'title': video.get('mediaName'), | ||
'duration': float_or_none( | ||
self._search_regex( | ||
r'PT(\d+\.?\d+)S', | ||
traverse_obj(video, ('playableAudioVideo', 'duration')), | ||
'video duration', | ||
default=None, | ||
fatal=False, | ||
) | ||
), | ||
} | ||
|
||
def _real_extract(self, url): | ||
host, video_id = self._match_valid_url(url).group('host', 'id') | ||
webpage = self._download_webpage(url, video_id) | ||
|
||
player_config = self._search_json( | ||
r'Echo\["mediaPlayerBootstrapApp"\]\("', | ||
webpage, | ||
'player config', | ||
video_id, | ||
transform_source=lambda x: x.replace(R'\"', '"'), | ||
) | ||
|
||
urlh = self._request_webpage( | ||
f'https://{host}/api/ui/sessions/{player_config["sessionId"]}', | ||
video_id, | ||
note='Open video session', | ||
errnote='Unable to open video session', | ||
) | ||
|
||
return self._parse_mediapackage( | ||
self._call_api( | ||
host, | ||
player_config.get('shareLinkId') or player_config['publicLinkId'], | ||
player_config['mediaId'], | ||
urlh.headers['Token'], | ||
)['data'] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,149 +1,99 @@ | ||
import re | ||
import urllib.parse | ||
|
||
from yt_dlp.extractor.common import InfoExtractor | ||
|
||
from yt_dlp.compat import ( | ||
compat_urllib_parse, | ||
compat_urllib_parse_urlparse, | ||
compat_urllib_parse_unquote, | ||
) | ||
|
||
from yt_dlp.utils import ( | ||
determine_ext, | ||
ExtractorError, | ||
int_or_none, | ||
url_or_none, | ||
urlencode_postdata, | ||
HEADRequest, | ||
mimetype2ext, | ||
encode_compat_str, | ||
) | ||
from moodle_dl.utils import determine_ext | ||
|
||
|
||
class OwncloudIE(InfoExtractor): | ||
class OwnCloudIE(InfoExtractor): | ||
IE_NAME = 'owncloud' | ||
|
||
_VALID_URL = r'''(?x) | ||
(?P<server>https?://(?: | ||
.*\.?sciebo\.de| | ||
cloud\.uni-koblenz-landau\.de | ||
))/s/ | ||
(?P<id>[A-Za-z0-9\-_.]+) | ||
_INSTANCES_RE = r'''(?: | ||
(?:[^\.]+\.)?sciebo\.de| | ||
cloud\.uni-koblenz-landau\.de| | ||
)''' | ||
_VALID_URL = rf'''(?x) | ||
(?P<server>https?://{_INSTANCES_RE})/s/ | ||
(?P<id>[\w\-\.]+) | ||
(?P<extra>/.*)? | ||
''' | ||
|
||
_TESTS = [ | ||
{ | ||
'url': 'https://ruhr-uni-bochum.sciebo.de/s/wWhqZzh9jTumVFN', | ||
'info_dict': { | ||
'id': 'wWhqZzh9jTumVFN', | ||
'ext': 'mp4', | ||
'title': 'CmvpJST.mp4', | ||
}, | ||
}, | ||
] | ||
|
||
def _real_extract(self, url): | ||
mobj = re.match(self._VALID_URL, url) | ||
server = mobj.group('server') | ||
video_id = mobj.group('id') | ||
# url_extra = mobj.group('extra') | ||
server, video_id = self._match_valid_url(url).group('server', 'id') | ||
|
||
landing_url = server + '/s/' + video_id | ||
landing_webpage, urlh = self._download_webpage_handle(url, landing_url, 'Downloading Owncloud landing page') | ||
opend_landing_url = urlh.geturl() | ||
webpage, urlh = self._download_webpage_handle(url, f'{server}/s/{video_id}', 'Downloading webpage') | ||
|
||
password_protected = self._search_regex( | ||
r'<label[^>]+?for="(password)"', landing_webpage, 'password field', fatal=False, default=None | ||
) | ||
if password_protected is not None: | ||
if self._search_regex(r'<label[^>]+?for="(password)"', webpage, 'password field', fatal=False, default=None): | ||
# Password protected | ||
landing_webpage, urlh = self._verify_video_password(landing_webpage, opend_landing_url, video_id) | ||
|
||
landing_inputs = self._hidden_inputs(landing_webpage) | ||
|
||
title = landing_inputs.get('filename', 'Unknown title') | ||
# could be used for mimetype2ext | ||
# mimetype = landing_inputs.get('mimetype', None) | ||
|
||
filesize = landing_inputs.get('filesize', None) | ||
download_url = landing_inputs.get('downloadURL', None) | ||
|
||
if download_url is None: | ||
download_url = self._extend_to_download_url(urlh.geturl()) | ||
|
||
ext_req = HEADRequest(download_url) | ||
ext_handle = self._request_webpage(ext_req, video_id, note='Determining extension') | ||
ext = self.urlhandle_detect_ext(ext_handle) | ||
|
||
formats = [] | ||
formats.append( | ||
{ | ||
'url': url_or_none(download_url), | ||
'ext': ext, | ||
'filesize': int_or_none(filesize), | ||
} | ||
) | ||
self._sort_formats(formats) | ||
|
||
return {'id': video_id, 'title': title, 'formats': formats} | ||
|
||
def urlhandle_detect_ext(self, url_handle): | ||
getheader = url_handle.headers.get | ||
|
||
def encode_compat_str_or_none(x, encoding='iso-8859-1', errors='ignore'): | ||
return encode_compat_str(x, encoding=encoding, errors=errors) if x else None | ||
|
||
cd = encode_compat_str_or_none(getheader('Content-Disposition')) | ||
if cd: | ||
m = re.match( | ||
r'''(?xi) | ||
attachment;\s* | ||
(?:filename\s*=[^;]+?;\s*)? # possible initial filename=...;, ignored | ||
filename(?P<x>\*)?\s*=\s* # filename/filename* = | ||
(?(x)(?P<charset>\S+?)'[\w-]*'|(?P<q>")?) # if * then charset'...' else maybe " | ||
(?P<filename>(?(q)[^"]+(?=")|[^\s;]+)) # actual name of file | ||
''', | ||
cd, | ||
) | ||
if m: | ||
m = m.groupdict() | ||
filename = m.get('filename') | ||
if m.get('x'): | ||
try: | ||
filename = compat_urllib_parse_unquote(filename, encoding=m.get('charset', 'utf-8')) | ||
except LookupError: # unrecognised character set name | ||
pass | ||
e = determine_ext(filename, default_ext=None) | ||
if e: | ||
return e | ||
|
||
ct = encode_compat_str_or_none(getheader('Content-Type')) | ||
return mimetype2ext(ct) | ||
webpage, urlh = self._verify_video_password(webpage, urlh.geturl(), video_id) | ||
|
||
hidden_inputs = self._hidden_inputs(webpage) | ||
title = hidden_inputs.get('filename') | ||
|
||
return { | ||
'id': video_id, | ||
'title': title, | ||
'formats': [ | ||
{ | ||
'url': url_or_none(hidden_inputs.get('downloadURL') or self._extend_to_download_url(urlh.geturl())), | ||
'ext': determine_ext(title), | ||
'filesize': int_or_none(hidden_inputs.get('filesize')), | ||
} | ||
], | ||
} | ||
|
||
def _extend_to_download_url(self, url: str) -> str: | ||
""" | ||
Adds the string /download to a URL | ||
@param url: The URL where the string should be added. | ||
@return: The URL with the string. | ||
""" | ||
|
||
url_parts = list(compat_urllib_parse_urlparse(url)) | ||
# Adds /download to the end of the URL path | ||
url_parts = list(urllib.parse.urlparse(url)) | ||
url_parts[2] = url_parts[2].rstrip('/') + '/download' | ||
return compat_urllib_parse.urlunparse(url_parts) | ||
return urllib.parse.urlunparse(url_parts) | ||
|
||
def _verify_video_password(self, webpage, url, video_id): | ||
password = self._downloader.params.get('videopassword') | ||
if password is None: | ||
raise ExtractorError( | ||
'This video is protected by a password, use the --video-password option', expected=True | ||
) | ||
requesttoken = self._search_regex(r'<input[^>]+?name="requesttoken" value="([^\"]+)"', webpage, 'requesttoken') | ||
data = urlencode_postdata({'requesttoken': requesttoken, 'password': password}) | ||
|
||
data = urlencode_postdata( | ||
{ | ||
'requesttoken': self._search_regex( | ||
r'<input[^>]+?name="requesttoken" value="([^\"]+)"', webpage, 'requesttoken' | ||
), | ||
'password': password, | ||
} | ||
) | ||
|
||
validation_response, urlh = self._download_webpage_handle( | ||
url, video_id, note='Validating Password...', errnote='Wrong password?', data=data | ||
) | ||
|
||
password_protected = self._search_regex( | ||
if self._search_regex( | ||
r'<label[^>]+?for="(password)"', validation_response, 'password field', fatal=False, default=None | ||
) | ||
warning = self._search_regex( | ||
r'<div[^>]+?class="warning">([^<]*)</div>', | ||
validation_response, | ||
'warning', | ||
fatal=False, | ||
default="The password is wrong. Try again.", | ||
) | ||
if password_protected is not None: | ||
): | ||
# Still password protected | ||
warning = self._search_regex( | ||
r'<div[^>]+?class="warning">([^<]*)</div>', | ||
validation_response, | ||
'warning', | ||
fatal=False, | ||
default="The password is wrong. Try again.", | ||
) | ||
raise ExtractorError(f'Login failed, {self.IE_NAME} said: {warning!r}', expected=True) | ||
return validation_response, urlh |
Oops, something went wrong.