Skip to content

Commit

Permalink
Merge pull request #185 from C0D3D3V/pr_add_extractors
Browse files Browse the repository at this point in the history
implement #179 add extractor for sharepoint, echo360 and update owncloud extractor
  • Loading branch information
C0D3D3V authored Mar 13, 2023
2 parents 6a76f9f + c349e32 commit f584382
Show file tree
Hide file tree
Showing 5 changed files with 334 additions and 113 deletions.
5 changes: 4 additions & 1 deletion moodle_dl/downloader/extractors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
from yt_dlp.YoutubeDL import YoutubeDL
from yt_dlp.extractor.common import InfoExtractor

from moodle_dl.downloader.extractors.echo360 import Echo360IE # noqa: F401
from moodle_dl.downloader.extractors.googledrive import GoogleDriveIE # noqa: F401
from moodle_dl.downloader.extractors.helixmedia_lti import HelixmediaLtiIE # noqa: F401
from moodle_dl.downloader.extractors.kalvidres_lti import KalvidresLtiIE # noqa: F401
from moodle_dl.downloader.extractors.opencast_lti import OpencastLtiIE # noqa: F401
from moodle_dl.downloader.extractors.owncloud import OwncloudIE # noqa: F401
from moodle_dl.downloader.extractors.owncloud import OwnCloudIE # noqa: F401
from moodle_dl.downloader.extractors.sharepoint import SharePointIE # noqa: F401
from moodle_dl.downloader.extractors.sharepointfiles import SharePointFilesIE # noqa: F401

ALL_ADDITIONAL_EXTRACTORS = [Class for name, Class in globals().items() if name.endswith('IE')]

Expand Down
135 changes: 135 additions & 0 deletions moodle_dl/downloader/extractors/echo360.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import re

from yt_dlp.extractor.common import InfoExtractor
from yt_dlp.utils import (
determine_ext,
float_or_none,
traverse_obj,
variadic,
)


class Echo360IE(InfoExtractor):
_INSTANCES_RE = r'''(?:
echo360\.ca|
echo360\.net\.au|
echo360\.org\.au|
echo360\.org\.uk|
echo360\.org|
)'''
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
_VALID_URL = rf'''(?x)
https?://(?P<host>{_INSTANCES_RE})
/media/(?P<id>{_UUID_RE})/public'''

_API_BASE = 'https://%s/api/ui/echoplayer/public-links/%s/media/%s/player-properties'

_TESTS = [
{
'url': 'https://echo360.org.uk/media/1d8392aa-a3e7-4e78-94cf-b6532c27208c/public',
'info_dict': {
'id': '3c7ae6e0-fa19-432d-aa21-c283b4276f2a',
'ext': 'mp4',
'title': '3-4 Force + moment + mechanics.mp4',
'duration': 4731.888,
},
'params': {'skip_download': 'm3u8'},
},
{
'url': 'https://echo360.net.au/media/f04960a9-2efc-4b63-87b5-72e629081d15/public',
'info_dict': {
'id': '6098a147-2d65-40f3-b9e9-a0204afe450c',
'ext': 'mp4',
'title': 'EXSC634_Online_Workshop_Week_4.mp4',
'duration': 6659.72,
},
'params': {'skip_download': 'm3u8'},
},
]

def _call_api(self, host, video_id, media_id, session_token, **kwargs):
return self._download_json(
self._API_BASE % (host, video_id, media_id),
video_id,
headers={'Authorization': f'Bearer {session_token}'},
**kwargs,
)

@staticmethod
def _update_url_query(uri, query_string):
if query_string is not None:
return f'{uri.split("?", 1)[0]}?{query_string}'
return uri

@staticmethod
def _get_query_string(uri, query_strings):
uri_base = uri.split("?", 1)[0]
for query_string in query_strings:
if re.match(query_string['uriPattern'], uri_base):
return query_string['queryString']
return None

def _parse_mediapackage(self, video):
video_id = video['playableAudioVideo']['mediaId']
query_strings = traverse_obj(video, ('sourceQueryStrings', 'queryStrings')) or []

formats = []
for track in variadic(traverse_obj(video, ('playableAudioVideo', 'playableMedias')) or []):
href = track.get('uri')
if href is None:
continue
href = self._update_url_query(href, self._get_query_string(href, query_strings))
if track.get('isHls') or determine_ext(href, None) == 'm3u8':
hls_formats = self._extract_m3u8_formats(
href, video_id, live=track.get('isLive'), m3u8_id='hls', entry_protocol='m3u8_native', fatal=False
)

for hls_format in hls_formats:
query_string = self._get_query_string(hls_format['url'], query_strings)
hls_format['extra_param_to_segment_url'] = query_string
hls_format['url'] = self._update_url_query(hls_format['url'], query_string)

formats.extend(hls_formats)

return {
'id': video_id,
'formats': formats,
'title': video.get('mediaName'),
'duration': float_or_none(
self._search_regex(
r'PT(\d+\.?\d+)S',
traverse_obj(video, ('playableAudioVideo', 'duration')),
'video duration',
default=None,
fatal=False,
)
),
}

def _real_extract(self, url):
host, video_id = self._match_valid_url(url).group('host', 'id')
webpage = self._download_webpage(url, video_id)

player_config = self._search_json(
r'Echo\["mediaPlayerBootstrapApp"\]\("',
webpage,
'player config',
video_id,
transform_source=lambda x: x.replace(R'\"', '"'),
)

urlh = self._request_webpage(
f'https://{host}/api/ui/sessions/{player_config["sessionId"]}',
video_id,
note='Open video session',
errnote='Unable to open video session',
)

return self._parse_mediapackage(
self._call_api(
host,
player_config.get('shareLinkId') or player_config['publicLinkId'],
player_config['mediaId'],
urlh.headers['Token'],
)['data']
)
174 changes: 62 additions & 112 deletions moodle_dl/downloader/extractors/owncloud.py
Original file line number Diff line number Diff line change
@@ -1,149 +1,99 @@
import re
import urllib.parse

from yt_dlp.extractor.common import InfoExtractor

from yt_dlp.compat import (
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_parse_unquote,
)

from yt_dlp.utils import (
determine_ext,
ExtractorError,
int_or_none,
url_or_none,
urlencode_postdata,
HEADRequest,
mimetype2ext,
encode_compat_str,
)
from moodle_dl.utils import determine_ext


class OwncloudIE(InfoExtractor):
class OwnCloudIE(InfoExtractor):
IE_NAME = 'owncloud'

_VALID_URL = r'''(?x)
(?P<server>https?://(?:
.*\.?sciebo\.de|
cloud\.uni-koblenz-landau\.de
))/s/
(?P<id>[A-Za-z0-9\-_.]+)
_INSTANCES_RE = r'''(?:
(?:[^\.]+\.)?sciebo\.de|
cloud\.uni-koblenz-landau\.de|
)'''
_VALID_URL = rf'''(?x)
(?P<server>https?://{_INSTANCES_RE})/s/
(?P<id>[\w\-\.]+)
(?P<extra>/.*)?
'''

_TESTS = [
{
'url': 'https://ruhr-uni-bochum.sciebo.de/s/wWhqZzh9jTumVFN',
'info_dict': {
'id': 'wWhqZzh9jTumVFN',
'ext': 'mp4',
'title': 'CmvpJST.mp4',
},
},
]

def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
server = mobj.group('server')
video_id = mobj.group('id')
# url_extra = mobj.group('extra')
server, video_id = self._match_valid_url(url).group('server', 'id')

landing_url = server + '/s/' + video_id
landing_webpage, urlh = self._download_webpage_handle(url, landing_url, 'Downloading Owncloud landing page')
opend_landing_url = urlh.geturl()
webpage, urlh = self._download_webpage_handle(url, f'{server}/s/{video_id}', 'Downloading webpage')

password_protected = self._search_regex(
r'<label[^>]+?for="(password)"', landing_webpage, 'password field', fatal=False, default=None
)
if password_protected is not None:
if self._search_regex(r'<label[^>]+?for="(password)"', webpage, 'password field', fatal=False, default=None):
# Password protected
landing_webpage, urlh = self._verify_video_password(landing_webpage, opend_landing_url, video_id)

landing_inputs = self._hidden_inputs(landing_webpage)

title = landing_inputs.get('filename', 'Unknown title')
# could be used for mimetype2ext
# mimetype = landing_inputs.get('mimetype', None)

filesize = landing_inputs.get('filesize', None)
download_url = landing_inputs.get('downloadURL', None)

if download_url is None:
download_url = self._extend_to_download_url(urlh.geturl())

ext_req = HEADRequest(download_url)
ext_handle = self._request_webpage(ext_req, video_id, note='Determining extension')
ext = self.urlhandle_detect_ext(ext_handle)

formats = []
formats.append(
{
'url': url_or_none(download_url),
'ext': ext,
'filesize': int_or_none(filesize),
}
)
self._sort_formats(formats)

return {'id': video_id, 'title': title, 'formats': formats}

def urlhandle_detect_ext(self, url_handle):
getheader = url_handle.headers.get

def encode_compat_str_or_none(x, encoding='iso-8859-1', errors='ignore'):
return encode_compat_str(x, encoding=encoding, errors=errors) if x else None

cd = encode_compat_str_or_none(getheader('Content-Disposition'))
if cd:
m = re.match(
r'''(?xi)
attachment;\s*
(?:filename\s*=[^;]+?;\s*)? # possible initial filename=...;, ignored
filename(?P<x>\*)?\s*=\s* # filename/filename* =
(?(x)(?P<charset>\S+?)'[\w-]*'|(?P<q>")?) # if * then charset'...' else maybe "
(?P<filename>(?(q)[^"]+(?=")|[^\s;]+)) # actual name of file
''',
cd,
)
if m:
m = m.groupdict()
filename = m.get('filename')
if m.get('x'):
try:
filename = compat_urllib_parse_unquote(filename, encoding=m.get('charset', 'utf-8'))
except LookupError: # unrecognised character set name
pass
e = determine_ext(filename, default_ext=None)
if e:
return e

ct = encode_compat_str_or_none(getheader('Content-Type'))
return mimetype2ext(ct)
webpage, urlh = self._verify_video_password(webpage, urlh.geturl(), video_id)

hidden_inputs = self._hidden_inputs(webpage)
title = hidden_inputs.get('filename')

return {
'id': video_id,
'title': title,
'formats': [
{
'url': url_or_none(hidden_inputs.get('downloadURL') or self._extend_to_download_url(urlh.geturl())),
'ext': determine_ext(title),
'filesize': int_or_none(hidden_inputs.get('filesize')),
}
],
}

def _extend_to_download_url(self, url: str) -> str:
"""
Adds the string /download to a URL
@param url: The URL where the string should be added.
@return: The URL with the string.
"""

url_parts = list(compat_urllib_parse_urlparse(url))
# Adds /download to the end of the URL path
url_parts = list(urllib.parse.urlparse(url))
url_parts[2] = url_parts[2].rstrip('/') + '/download'
return compat_urllib_parse.urlunparse(url_parts)
return urllib.parse.urlunparse(url_parts)

def _verify_video_password(self, webpage, url, video_id):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError(
'This video is protected by a password, use the --video-password option', expected=True
)
requesttoken = self._search_regex(r'<input[^>]+?name="requesttoken" value="([^\"]+)"', webpage, 'requesttoken')
data = urlencode_postdata({'requesttoken': requesttoken, 'password': password})

data = urlencode_postdata(
{
'requesttoken': self._search_regex(
r'<input[^>]+?name="requesttoken" value="([^\"]+)"', webpage, 'requesttoken'
),
'password': password,
}
)

validation_response, urlh = self._download_webpage_handle(
url, video_id, note='Validating Password...', errnote='Wrong password?', data=data
)

password_protected = self._search_regex(
if self._search_regex(
r'<label[^>]+?for="(password)"', validation_response, 'password field', fatal=False, default=None
)
warning = self._search_regex(
r'<div[^>]+?class="warning">([^<]*)</div>',
validation_response,
'warning',
fatal=False,
default="The password is wrong. Try again.",
)
if password_protected is not None:
):
# Still password protected
warning = self._search_regex(
r'<div[^>]+?class="warning">([^<]*)</div>',
validation_response,
'warning',
fatal=False,
default="The password is wrong. Try again.",
)
raise ExtractorError(f'Login failed, {self.IE_NAME} said: {warning!r}', expected=True)
return validation_response, urlh
Loading

0 comments on commit f584382

Please sign in to comment.