From 265f73bfd37235e25841ee36cd31c0a0c340f7ef Mon Sep 17 00:00:00 2001 From: Miguel Date: Sun, 29 Nov 2020 00:25:17 +0000 Subject: [PATCH 1/4] Update --- .gitignore | 1 - readme.md | 8 + scraper.conf | 6 + scraper.conf.sample | 5 - scraper.py | 399 ++++++++++++++++++++++++++------------------ 5 files changed, 249 insertions(+), 170 deletions(-) delete mode 100644 .gitignore create mode 100644 scraper.conf delete mode 100644 scraper.conf.sample diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 8bb84b2..0000000 --- a/.gitignore +++ /dev/null @@ -1 +0,0 @@ -scraper.conf diff --git a/readme.md b/readme.md index de095b3..161c098 100644 --- a/readme.md +++ b/readme.md @@ -55,6 +55,14 @@ python scraper.py Follow the Dialog. +Updates Made +---------- +- Fix to work with new versions of Moodle +- Fix to work with Python3 +- Option to filter only some courses +- Remove semester scrapping option +- Added options to input the username, password and courses filter directly from the command prompt + Disclaimer ---------- diff --git a/scraper.conf b/scraper.conf new file mode 100644 index 0000000..0cb0e32 --- /dev/null +++ b/scraper.conf @@ -0,0 +1,6 @@ +[scraper] +baseurl = https://elearning.novaims.unl.pt/moodle/ +user = user +pwd = password +root = + diff --git a/scraper.conf.sample b/scraper.conf.sample deleted file mode 100644 index 934ec08..0000000 --- a/scraper.conf.sample +++ /dev/null @@ -1,5 +0,0 @@ -[scraper] -user = username -pwd = password -root = /path/to/your/desired/location/ -baseurl = https://moodle.school.tld/ diff --git a/scraper.py b/scraper.py index cb4a99d..33eda58 100755 --- a/scraper.py +++ b/scraper.py @@ -1,28 +1,29 @@ #!/usr/bin/env python2 from requests import session from bs4 import BeautifulSoup -import os, sys, itertools, re -import urllib -import ConfigParser +import os, sys +import configparser as ConfigParser import datetime +import json +import urllib + + +ERROR_LOGIN = 'Error on Login' +ERROR_LINK = 'Link provided is wrong' # read config config = ConfigParser.RawConfigParser() config.read('scraper.conf') +baseurl = 0 +root='' -username = config.get("scraper", "user"); -password = config.get("scraper", "pwd"); -root = config.get("scraper", "root"); -baseurl = config.get("scraper", "baseurl"); - -sections = itertools.count() -files = itertools.count() class colors: HEADER = '\033[95m' OKBLUE = '\033[94m' OKGREEN = '\033[92m' WARNING = '\033[93m' + DANGER = '\033[31m' FAIL = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' @@ -35,76 +36,68 @@ def login(user, pwd): } with session() as ses: r = ses.post(baseurl + 'login/index.php', data=authdata) - return ses + + if r.status_code != 200: + raise Exception(ERROR_LINK) + + soup = BeautifulSoup(r.text,'html.parser') + + user = soup.select_one('span[class*="userbutton"]') + if(user!=None): + return ses + else: + raise Exception(ERROR_LOGIN) + + +def exit_and_save(): + with open('scraper.conf', 'w') as configfile: + config.write(configfile) + sys.exit() + +#Return a dictionary with the courses and respective links +def getCurrCourses(ses): + page = ses.get(baseurl + 'index.php') + soup = BeautifulSoup(page.text, 'html.parser') + curr_courses = soup.select('div[class*="courses-view-course-item"]') + result = dict() + for course in curr_courses: + tag_h = course.find('h4') + course_link = tag_h.a['href'] + course_name = tag_h.a.text.replace(' ','_').replace('/', '-') + result[course_name] = course_link + + return result + + +#Return a list of toupples containing the name and link of resource respectively +def getResources(ses, course_link): + page = ses.get(course_link) + soup = BeautifulSoup(page.text, 'html.parser') + course_div = soup.select('div[class*="course-content"]')[0] + resources = course_div.select('li[class*="activity resource"] div[class*="activityinstance"] a') + resource_list = [] + for resource in resources: + link = resource['href'] + name = resource.span.text + resource_list.append((name,link)) + return resource_list -def getSemesters(ses): - r = ses.get(baseurl + 'index.php') - if(r.status_code == 200): - soup = BeautifulSoup(r.text, 'html.parser') - semesters = dict() - temp = soup.find(id='cmb_mc_semester') - - for o in soup.find(id='cmb_mc_semester'): - if o != unicode('\n'): - if o.string != 'Alle Semester': - semesters[o['value']] = o.string - return semesters - else: - print 'ERROR: ' + str(r.status) + ' ' + r.reason - sys.exit() - - -def getInfo(tag): - c = dict() - c['url'] = tag['href'] - p = unicode(tag.string).split(',') - if len(p) >= 3: - q = p[0].split('.') - c['course'] = q[0].strip() - c['sem'] = q[1] - c['key'] = q[2].strip() - c['name'] = p[1].strip() - c['type'] = p[2].strip().replace(' ', '-') - elif len(p) == 1: - c['course'] = p[0].strip() - c['sem'] = 'X' - c['key'] = p[0].strip() - c['name'] = p[0].strip() - c['type'] = 'Allgemein' - return c - - -def getCoursesForSem(session, s): - r = session.get(baseurl + 'index.php?role=0&cat=1&csem=0&sem=' + s) - if(r.status_code == 200): - soup = BeautifulSoup(r.text, 'html.parser') - courses = list() - for o in soup.find_all('h3'): - if (len(o.find_all('a')) > 0): - c = getInfo(o.contents[0]) - courses.append(c) - return courses - else: - print 'ERROR: ' + str(r.status) + ' ' + r.reason - sys.exit() def saveFile(session, src, path, name): - global files - files.next() - dst = path + name.decode('utf-8') + dst = path + name dst = dst.replace(':', '-').replace('"', '') try: with open(dst): - print '['+colors.OKBLUE+'skip'+colors.ENDC+'] | | +--%s' %name + print('['+colors.OKBLUE+'skip'+colors.ENDC+'] | | +--%s' %name) pass except IOError: with open(dst, 'wb') as handle: - print '['+colors.OKGREEN+'save'+colors.ENDC+'] | | +--%s' %name + print('['+colors.OKGREEN+'save'+colors.ENDC+'] | | +--%s' %name) r = session.get(src, stream=True) for block in r.iter_content(1024): if not block: @@ -113,18 +106,18 @@ def saveFile(session, src, path, name): def saveLink(session, url, path, name): - global files - files.next() - fname = name.encode('utf-8').replace('/', '') + '.html' - dst = path.encode('utf-8') + fname + #global files + #files.next() + fname = name.replace('/', '') + '.html' + dst = path + fname dst = dst.replace(':', '-').replace('"', '') try: with open(dst): - print '['+colors.OKBLUE+'skip'+colors.ENDC+'] | | +--%s' %name + print('['+colors.OKBLUE+'skip'+colors.ENDC+'] | | +--%s' %name) pass except IOError: with open(dst, 'wb') as handle: - print '['+colors.OKGREEN+'save'+colors.ENDC+'] | | +--%s' %name + print('['+colors.OKGREEN+'save'+colors.ENDC+'] | | +--%s' %name) r = session.get(url) soup = BeautifulSoup(r.text, 'html.parser') link = soup.find(class_='region-content').a['href'] @@ -132,24 +125,24 @@ def saveLink(session, url, path, name): handle.write(u'' + name.decode('utf-8') + u'') except UnicodeEncodeError: os.remove(dst) - print '['+colors.FAIL+'fail'+colors.ENDC+'] | | +--%s' %name + print('['+colors.FAIL+'fail'+colors.ENDC+'] | | +--%s' %name) pass def saveInfo(path, info, tab): if "Foren" not in info: - global files - files.next() + #global files + #files.next() name = u'info.txt' dst = path + name dst = dst.replace(':', '-').replace('"', '') try: with open(dst): - print '['+colors.OKBLUE+'skip'+colors.ENDC+'] ' + tab + '+--%s' %name + print('['+colors.OKBLUE+'skip'+colors.ENDC+'] ',tab,'+--%s' %name) pass except IOError: with open(dst, 'wb') as handle: - print '['+colors.OKGREEN+'save'+colors.ENDC+'] ' + tab + '+--%s' %name + print('['+colors.OKGREEN+'save'+colors.ENDC+'] ',tab,'+--%s' %name) handle.write(info.encode('utf-8')) @@ -163,7 +156,7 @@ def downloadResource(session, res, path): headers = r.headers.keys() if ('content-disposition' in headers): #got a direct file link - name = r.headers['content-disposition'].decode('utf-8').split(';')[1].split('=')[1].strip('"') + name = r.headers['content-disposition'].split(';')[1].split('=')[1].strip('"') else: #got a preview page soup = BeautifulSoup(r.text, 'html.parser') @@ -174,16 +167,15 @@ def downloadResource(session, res, path): #it's obviously an ugly frameset site src = soup.find_all('frame')[1]['src'] name = os.path.basename(src) - name = urllib.url2pathname(name.encode('utf-8')) + name = urllib.request.url2pathname(name) saveFile(session, src, path, name) else: - print 'ERROR: ' + str(r.status) + ' ' + r.reason - sys.exit() + print(('ERROR: ',str(r.status),' ',r.reason)) + #sys.exit() def downloadSection(session, s, path): - #print "download Section" - global sections + #print("download Section") if s['id'] == 'section-0': try: info = s.find(class_='activity label modtype_label ').get_text() @@ -192,7 +184,7 @@ def downloadSection(session, s, path): else: saveInfo(path, info, u'') - res = s.find_all(class_='activity resource modtype_resource ') + res = s.select('[class*="activity resource modtype_resource"]') for r in res: downloadResource(session, r, path) folders = s.find_all(class_='box generalbox foldertree') @@ -201,18 +193,16 @@ def downloadSection(session, s, path): res = f.find_all(class_='fp-filename-icon') label = res.pop(0).text path = root + u'/' + label.replace('/', '-') - path = urllib.url2pathname(path.encode('utf-8')).replace(':', '-').replace('"', '') + path = path.encode('utf-8').replace(':', '-').replace('"', '').replace(' ','_') if not os.path.exists(path): os.makedirs(path) - print ' | +--' + colors.BOLD + label + colors.ENDC + print(' | +--',colors.BOLD,label,colors.ENDC) for r in res: downloadResource(session, r, path + u'/') else: - sections.next() - s = list(s.children)[2] + #s = list(s.children)[2] name = s.find(class_='sectionname').contents[0].replace('/', '-').strip().strip(':') + '/' - info = '' info = s.find(class_='summary').get_text().strip() if len(info) > 0: if 'Thema' in name: @@ -233,12 +223,12 @@ def downloadSection(session, s, path): path = path.replace(':', '-').replace('"', '') if not os.path.exists(path): os.makedirs(path) - print ' | +--' + colors.BOLD + name + colors.ENDC + print(' | +--',colors.BOLD,name,colors.ENDC) if len(info) > 0: saveInfo(path, info, u'| ') - res = s.find_all(class_='activity resource modtype_resource ') + res = s.select('[class*="activity resource modtype_resource"]') for r in res: downloadResource(session, r, path) """ @@ -254,100 +244,181 @@ def downloadSection(session, s, path): os.rmdir(path) -def downloadCourse(session, c, sem): - global files - global sections - files = itertools.count() - sections = itertools.count() - name = c['key'].replace('/', '-') + u'/' - path = root + sem.replace('/', '-') + u'/' + name - path = urllib.url2pathname(path.encode('utf-8')).replace(':', '-').replace('"', '') +def check_courses_selected(courses): + if config.has_option('scraper','curses'): + curses_list = json.loads(config.get("scraper","curses")) + if(len(curses_list)>0): + return curses_list + else: + curses_list = [] + courses_to_download = enumerate(courses.keys()) + for val,name in courses_to_download: + print(val,'-',name) + + print('\nType the number of the courses that you want to save for future searches.') + print('Type one at a time and press enter') + print('If you want all courses type "a", to finish selection type "q"') + + _input = input() + while _input != 'q': + if _input == 'a': + curses_list = list(courses.keys()) + break + try: + input_int = int(_input) + if(input_int < len(courses)): + for num,name in courses_to_download: + if num == input_int: + curses_list.append(name) + break + except: + None + + _input = input() + + print(curses_list) + str_courses = json.dumps(curses_list) + config.set('scraper','curses',str_courses) + + return curses_list + + +def check_auth_info(): + global baseurl + global root + if config.has_option('scraper','user'): + username = config.get("scraper", "user") + + else: + print('Please type your username below.') + username = input() + + if config.has_option('scraper','pwd'): + pwd = config.get("scraper", "pwd") + + else: + print('Please type your password below.',colors.DANGER,'THE PASSWORD WILL BE SAVED AS PLAIN TEXT',colors.ENDC) + pwd = input() + + if config.has_option('scraper','root'): + root = config.get("scraper", "root") + else: + root = '' + + if config.has_option('scraper','baseurl'): + baseurl = config.get("scraper", "baseurl") + + else: + print('Please the type the url below.') + baseurl = input() + + session = 0 + try: + session = login(username, pwd) + except Exception as e: + if e.__str__() == ERROR_LINK: + config.set('scraper','user',username) + config.set('scraper','pwd',pwd) + print(colors.DANGER,'Error connecting to website, either the website is unresponsive or the link provided is wrong.',colors.ENDC) + print(colors.WARNING,'BaseURL:' + baseurl,colors.ENDC) + print('To change the link, change the property "baseurl" in scraper.conf.') + exit_and_save() + elif e.__str__() == ERROR_LOGIN: + config.remove_option('scraper','user') + config.remove_option('scraper','pwd') + config.set('scraper','baseurl',baseurl) + print(colors.DANGER,'Error on authentication. Either the password or login are wrong.',colors.ENDC) + return check_auth_info() + else: + print(colors.DANGER,'Uknown Error ocurred while loginng in, exiting...') + sys.exit() + + config.set('scraper','user',username) + config.set('scraper','pwd',pwd) + config.set('scraper','root',root) + config.set('scraper','baseurl',baseurl) + + return session + + + + + + +def downloadCourse(session, name_course,link_course): + name = name_course.replace('/', '-') + u'/' + path = root + name + path = path.replace(':', '-').replace('"', '').replace(' ','_') if not os.path.exists(path): os.makedirs(path) - print ' +--' + colors.BOLD + name + colors.ENDC - r = session.get(c['url']) + print(' +--',colors.BOLD,name,colors.ENDC) + r = session.get(link_course) if(r.status_code == 200): soup = BeautifulSoup(r.text, 'html.parser') if not os.path.exists(path + '.dump'): os.makedirs(path + '.dump') - dst = path + '.dump/' + c['key'].replace('/', '-').encode('utf-8') + '-' + c['type'] + '-' + str(datetime.date.today()) + '-full.html' + dst = path + '.dump/' + name_course.replace('/', '-') + '-' + str(datetime.date.today()) + '-full.html' dst = dst.replace(':', '-').replace('"', '') with open(dst, 'wb') as f: f.write(soup.encode('utf-8')) for s in soup.find_all(class_='section main clearfix'): downloadSection(session, s, path) - #print 'Saved ' + str(files.next()) + ' Files in ' + str(sections.next()) + ' Sections' + #print('Saved ',str(files.next()),' Files in ',str(sections.next()),' Sections') else: - print 'ERROR: ' + str(r.status) + ' ' + r.reason - sys.exit() + print('ERROR: ',str(r.status),' ',r.reason) + exit_and_save() -print colors.HEADER -print " _____ .___.__ " -print " / \ ____ ____ __| _/| | ____ " -print " / \ / \ / _ \ / _ \ / __ | | | _/ __ \ " -print " / Y ( <_> | <_> ) /_/ | | |_\ ___/ " -print " \____|__ /\____/ \____/\____ | |____/\___ > " -print " \/ \/ \/ " -print " _________ " -print " / _____/ ________________ ______ ___________ " -print " \_____ \_/ ___\_ __ \__ \ \____ \_/ __ \_ __ \ " -print " / \ \___| | \// __ \| |_> > ___/| | \/ " -print "/_______ /\___ >__| (____ / __/ \___ >__| " -print " \/ \/ \/|__| \/ " -print colors.ENDC +print(colors.HEADER) +print(" _____ .___.__ ") +print(" / \ ____ ____ __| _/| | ____ ") +print(" / \ / \ / _ \ / _ \ / __ | | | _/ __ \ ") +print(" / Y ( <_> | <_> ) /_/ | | |_\ ___/ ") +print(" \____|__ /\____/ \____/\____ | |____/\___ > ") +print(" \/ \/ \/ ") +print(" _________ ") +print(" / _____/ ________________ ______ ___________ ") +print(" \_____ \_/ ___\_ __ \__ \ \____ \_/ __ \_ __ \ ") +print(" / \ \___| | \// __ \| |_> > ___/| | \/ ") +print("/_______ /\___ >__| (____ / __/ \___ >__| ") +print(" \/ \/ \/|__| \/ ") +print(colors.ENDC) #logging in -print "logging in..." -session = login(username, password) - -#get semesters -print "getting Semesters..." -sems = getSemesters(session) -if not sems: - print colors.FAIL + 'No semester found - Quitting!' + colors.ENDC - sys.exit() -else: - print colors.WARNING + 'Available semester:' + colors.ENDC - for s in sorted(sems): - print '[' + s + ']: ' + sems[s] - -#input loop -ok = False -while not ok: - s = raw_input(colors.WARNING + 'Select semester: ' + colors.ENDC) - ok = s in sems.keys() - -#get courses -print "getting Courses..." -courses = getCoursesForSem(session, s) -if not courses: - print colors.FAIL + 'No courses in this semester - Quitting!' + colors.ENDC - sys.exit() +print("logging in...") +session = check_auth_info() +#exit_and_save() + + + +print("getting Courses...") +courses = getCurrCourses(session) +if len(courses) == 0: + print(colors.FAIL,'No courses found - Quitting!',colors.ENDC) + exit_and_save() else: - print colors.WARNING + 'Available courses:' + colors.ENDC - for c in courses: - print '[' + str(courses.index(c)) + ']: ' + c['key'] + '.' + str(c['sem']) + ': ' + c['name'] + ' (' + c['type'] + ')' - -#confirmation -c = raw_input(colors.WARNING + 'Choose number of course to download, (a) for all or (q) to quit: ' + colors.ENDC) -if c == 'a': - for f in courses: - try: - downloadCourse(session, f, sems[s]) - print colors.WARNING + 'Successfully processed ' + str(files.next()) + ' Files in ' + str(sections.next()) + ' Sections!' + colors.ENDC - except: - print "Error while processing!" - quit() + print(colors.WARNING,'Available Courses:',colors.ENDC) + for name in courses.keys(): + print('[',name,']: ',courses[name]) + + +curses_list = check_courses_selected(courses) + -if c == 'q': - print colors.FAIL + 'Oh no? - Quitting!' + colors.ENDC - quit() -downloadCourse(session, courses.pop(int(c)), sems[s]) -print colors.WARNING + 'Successfully processed ' + str(files.next()) + ' Files in ' + str(sections.next()) + ' Sections!' + colors.ENDC + +for name,link in courses.items(): + if name not in curses_list: + continue + try: + downloadCourse(session, name, link) + except Exception as e: + print(colors.DANGER,"Error while processing!",colors.ENDC) + raise e + +exit_and_save() From 72b289dab39d47378ebeb990ad476b01ae854202 Mon Sep 17 00:00:00 2001 From: Miguel Date: Tue, 29 Dec 2020 18:49:58 +0000 Subject: [PATCH 2/4] Update readme.md --- readme.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/readme.md b/readme.md index 161c098..3b91b36 100644 --- a/readme.md +++ b/readme.md @@ -57,13 +57,17 @@ Follow the Dialog. Updates Made ---------- -- Fix to work with new versions of Moodle -- Fix to work with Python3 -- Option to filter only some courses -- Remove semester scrapping option -- Added options to input the username, password and courses filter directly from the command prompt +- Fix to work with new versions of Moodle. +- Fix to work with Python3. +- Option to filter only some courses. +- Remove semester scrapping option. +- Added options to input the username, password and courses filter directly from the command prompt. +Next Updates +---------- +- Download files from folders. + Disclaimer ---------- From 116da7d15a0de8ebff4c1150df88abc792d4d1fb Mon Sep 17 00:00:00 2001 From: Miguel Date: Wed, 2 Jun 2021 03:43:41 +0100 Subject: [PATCH 3/4] Update scraper.py --- scraper.py | 93 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 20 deletions(-) diff --git a/scraper.py b/scraper.py index 33eda58..c64c76e 100755 --- a/scraper.py +++ b/scraper.py @@ -6,6 +6,7 @@ import datetime import json import urllib +import zipfile ERROR_LOGIN = 'Error on Login' @@ -173,6 +174,48 @@ def downloadResource(session, res, path): print(('ERROR: ',str(r.status),' ',r.reason)) #sys.exit() +def downloadExtract(session, res, path): + try: + src = res.a['href'] + except TypeError: + return + page_download = session.get(src) + if(page_download.status_code == 200): + soup = BeautifulSoup(page_download.text, 'html.parser') + button_div= soup.find(class_='box generalbox folderbuttons py-3') + id_input = button_div.find('input', {'name': 'id'}).get('value') + session_input = button_div.find('input', {'name': 'sesskey'}).get('value') + form_link = button_div.find('form').get('action') + args = {'id':id_input,'sesskey':session_input} + #print(id_input,session_input,form_link) + + resource = session.post(form_link,data=args) + final_path = path+"the.zip" + with open(final_path, 'wb') as handle: + handle.write(resource.content) + + zip_ref = zipfile.ZipFile(final_path) + zip_ref.extractall(path) + zip_ref.close() + os.remove(final_path) + #saveFile(session, resource.content, path, "the.zip") + else: + print(('ERROR: ',str(page_download.status),' ',page_download.reason)) + +#def downloadFolder(session, res, path): +# try: +# src = res.a['href'] +# except TypeError: +# return +# page_download = session.get(src) +# if(page_download.status_code == 200): +# soup = BeautifulSoup(page_download.text, 'html.parser') +# folders = soup.find_all(class_='box generalbox foldertree py-3') +# print(len(folders)) +# for f in folders: +# downloadResource(session, f, path) +# else: +# print(('ERROR: ',str(page_download.status),' ',page_download.reason)) def downloadSection(session, s, path): #print("download Section") @@ -183,22 +226,22 @@ def downloadSection(session, s, path): pass else: saveInfo(path, info, u'') - + res = s.select('[class*="activity resource modtype_resource"]') for r in res: downloadResource(session, r, path) - folders = s.find_all(class_='box generalbox foldertree') + + folders = s.find_all(class_='activity folder modtype_folder') root = path for f in folders: - res = f.find_all(class_='fp-filename-icon') - label = res.pop(0).text - path = root + u'/' + label.replace('/', '-') - path = path.encode('utf-8').replace(':', '-').replace('"', '').replace(' ','_') + res = f.find_all(class_='instancename') + label = str(res.pop(0).contents[0])#.text + path = root + '/' + label.replace('/', '-') + path = path.replace(':', '-').replace('"', '').replace(' ','_') if not os.path.exists(path): os.makedirs(path) print(' | +--',colors.BOLD,label,colors.ENDC) - for r in res: - downloadResource(session, r, path + u'/') + downloadExtract(session, f, path + '/') else: #s = list(s.children)[2] @@ -239,6 +282,17 @@ def downloadSection(session, s, path): saveLink(session, l.a['href'], path, ln.get_text()) """ + folders = s.find_all(class_='activity folder modtype_folder') + root = path[:-1] + for f in folders: + res = f.find_all(class_='instancename') + label = str(res.pop(0).contents[0])#.text + path_ = root + label.replace('/', '-').replace(':', '-').replace('"', '').replace(' ','_') + if not os.path.exists(path_): + os.makedirs(path_) + print(' | +--',colors.BOLD,label,colors.ENDC) + downloadExtract(session, f, path_ + '/') + #remove empty folders if os.listdir(path) == []: os.rmdir(path) @@ -267,7 +321,8 @@ def check_courses_selected(courses): try: input_int = int(_input) if(input_int < len(courses)): - for num,name in courses_to_download: + for num,name in enumerate(courses.keys()): + #print(name,num) if num == input_int: curses_list.append(name) break @@ -292,6 +347,7 @@ def check_auth_info(): else: print('Please type your username below.') username = input() + config.set('scraper','user',username) if config.has_option('scraper','pwd'): pwd = config.get("scraper", "pwd") @@ -299,6 +355,7 @@ def check_auth_info(): else: print('Please type your password below.',colors.DANGER,'THE PASSWORD WILL BE SAVED AS PLAIN TEXT',colors.ENDC) pwd = input() + config.set('scraper','pwd',pwd) if config.has_option('scraper','root'): root = config.get("scraper", "root") @@ -311,6 +368,7 @@ def check_auth_info(): else: print('Please the type the url below.') baseurl = input() + config.set('scraper','baseurl',baseurl) session = 0 try: @@ -333,16 +391,10 @@ def check_auth_info(): print(colors.DANGER,'Uknown Error ocurred while loginng in, exiting...') sys.exit() - config.set('scraper','user',username) - config.set('scraper','pwd',pwd) config.set('scraper','root',root) - config.set('scraper','baseurl',baseurl) return session - - - def downloadCourse(session, name_course,link_course): @@ -364,7 +416,9 @@ def downloadCourse(session, name_course,link_course): with open(dst, 'wb') as f: f.write(soup.encode('utf-8')) for s in soup.find_all(class_='section main clearfix'): + #test_download_folder(session,s, path) downloadSection(session, s, path) + #print('Saved ',str(files.next()),' Files in ',str(sections.next()),' Sections') else: print('ERROR: ',str(r.status),' ',r.reason) @@ -400,17 +454,16 @@ def downloadCourse(session, name_course,link_course): if len(courses) == 0: print(colors.FAIL,'No courses found - Quitting!',colors.ENDC) exit_and_save() -else: - print(colors.WARNING,'Available Courses:',colors.ENDC) - for name in courses.keys(): - print('[',name,']: ',courses[name]) +#else: + #print(colors.WARNING,'Available Courses:',colors.ENDC) + #for name in courses.keys(): + #print('[',name,']: ',courses[name]) curses_list = check_courses_selected(courses) - for name,link in courses.items(): if name not in curses_list: continue From 70ab21e235d28b99a5dc9ad86ed895e8526e919b Mon Sep 17 00:00:00 2001 From: Miguel Date: Wed, 2 Jun 2021 03:44:14 +0100 Subject: [PATCH 4/4] Update readme.md --- readme.md | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/readme.md b/readme.md index 3b91b36..c74e119 100644 --- a/readme.md +++ b/readme.md @@ -53,21 +53,19 @@ Usage python scraper.py ``` -Follow the Dialog. +Follow the Dialog.
+To chose the settings again just delete the variable in the file. Updates Made ---------- -- Fix to work with new versions of Moodle. -- Fix to work with Python3. -- Option to filter only some courses. -- Remove semester scrapping option. -- Added options to input the username, password and courses filter directly from the command prompt. +- Fix to work with new versions of Moodle +- Fix to work with Python3 +- Option to filter only some courses +- Remove semester scrapping option +- Added options to input the username, password and courses filter directly from the command prompt +- Download Folders -Next Updates ----------- -- Download files from folders. - Disclaimer ----------