Skip to content

Commit

Permalink
fix bugs
Browse files Browse the repository at this point in the history
- escape quotes (sql: single quotes count as a string constant. double quotes are for indentifiers. 🤔😮. escape by `''`)
- all fields could be NULL
- update license text
- wait for codezinger to load all the folders, otherwise program reads it as 0
- safe retrieve elements with the possibility that the data there might not exist
  • Loading branch information
RoguedBear committed Aug 28, 2021
1 parent 960c3b1 commit c4fef91
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 14 deletions.
5 changes: 3 additions & 2 deletions FileCreation.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ def dataInsertion(Data: list):
fp.write("Insert into labreport\nvalues\n")

for COUNT, data in enumerate(Data):
problem_desc = data.get("problem_desc")
problem_desc = data.get("problem_desc").replace("'", "''")
assign_date = data.get("assigned_date")
due_date = data.get("submission_date")
due_date = f"\'{due_date}\'" if due_date != "NULL" else due_date
mission_status = 1 if data.get("status") else 0
fp.write(f"\t({COUNT + 1},'{problem_desc}', {assign_date}, '{due_date}', {mission_status})")
fp.write(f"\t({COUNT + 1},'{problem_desc}', {assign_date}, {due_date}, {mission_status})")
if COUNT != len(Data) - 1: fp.write(",\n")
else: fp.write("\n")
fp.write(";")
Expand Down
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def driver_exists():
driver_exists()
driver = webdriver.Chrome()
print("""DB-Hax Copyright (C) 2021 RoguedBear, Ya-s-h
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This program comes with ABSOLUTELY NO WARRANTY; see COPYING
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.""")
under certain conditions; see COPYING""")
try:
main(driver)
except Exception as ex:
Expand Down
39 changes: 29 additions & 10 deletions scrap_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ def login_codezinger(driver: webdriver.Chrome, username: str = "", password: str


def expand_all_labs(driver: webdriver.Chrome):
buttons = driver.find_elements_by_xpath(FOLDER_BUTTON_XPATH)
buttons = []
while not buttons:
buttons = driver.find_elements_by_xpath(FOLDER_BUTTON_XPATH)
# print(buttons)
for button in buttons:
button.click()
Expand All @@ -88,29 +90,46 @@ def get_data(driver: webdriver.Chrome) -> List[dict]:
data_list: List[dict, ...] = []
questions = driver.find_elements_by_xpath(QUESTIONS_XPATH)

print()
question: selenium.webdriver.remote.webelement.WebElement
for question in questions:
problem_no = question.find_element_by_xpath(PROBLEM_NUMBER_XPATH).text
question_title = question.find_element_by_xpath(QUESTION_TITLE_XPATH).text
due_date = question.find_element_by_xpath(DUE_DATE_XPATH).text.rstrip(" /-")
status = question.find_element_by_xpath(STATUS_XPATH).text
for index, question in enumerate(questions):
print("Processing data... ({:3.0%})".format(index/len(questions)), end="\r")

parsed_date: datetime = datetime.strptime(due_date, "%d %b %I:%M %p")
parsed_date = parsed_date.replace(year=datetime.now().year)
problem_no = safe_find_element_by_xpath(question, PROBLEM_NUMBER_XPATH)
question_title = safe_find_element_by_xpath(question, QUESTION_TITLE_XPATH)
due_date = safe_find_element_by_xpath(question, DUE_DATE_XPATH).rstrip(" /-")
status = safe_find_element_by_xpath(question, STATUS_XPATH)

try:
parsed_date = datetime.strptime(due_date, "%d %b %I:%M %p")
parsed_date = parsed_date.replace(year=datetime.now().year)
parsed_date = parsed_date.isoformat()
except ValueError:
parsed_date = "NULL"

data = {
"problem_desc": problem_no + " " + question_title,
"assigned_date": "NULL",
"submission_date": parsed_date.isoformat(),
"status": status == "Submitted"
"submission_date": parsed_date,
"status": status == "Submitted" if status != "NULL" else status
}
data_list.append(data)
print()

print("Scraped", len(questions), "questions")

return data_list


def safe_find_element_by_xpath(element: selenium.webdriver.remote.webelement.WebElement, xpath: str) -> str:
result: str
try:
result = element.find_element_by_xpath(xpath).text
except NoSuchElementException:
result = "NULL"
return result


if __name__ == '__main__':
driver = webdriver.Chrome()
try:
Expand Down

0 comments on commit c4fef91

Please sign in to comment.