Skip to content

Commit

Permalink
Move import functions into helper file so they are re-usable by other…
Browse files Browse the repository at this point in the history
… apps for file imports.

- Check empty rows() now actioned for assessment, content pages imports and any other cms imports that uses the parse_file function.
- We are not able to move snake_case into fix_rows() in import_helpers.py because contentset uses PascalCase for headers.
If we snake_cased every import, there will be key errors in contentsets.
  • Loading branch information
DevChima committed Feb 17, 2025
1 parent 10bd8b2 commit c103c9a
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 18 deletions.
27 changes: 12 additions & 15 deletions home/import_assessments.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,14 @@
from wagtail.coreutils import get_content_languages # type: ignore
from wagtail.models import Locale, Page # type: ignore

from home.import_helpers import ImportException, parse_file, validate_using_form
from home.import_helpers import (
ImportException,
convert_headers_to_snake_case,
validate_using_form,
)
from home.import_helpers import (
parse_file as helper_parse_file,
)
from home.models import Assessment, ContentPage, HomePage # type: ignore

AssessmentId = tuple[str, Locale]
Expand Down Expand Up @@ -106,21 +113,13 @@ def parse_file(self) -> list["AssessmentRow"]:
c. Validates that the snake_case headers contain all mandatory headers.
d. Transforms each row to use snake_case headers.
"""

row_iterator = parse_file(self.file_content, self.file_type)
row_iterator = helper_parse_file(self.file_content, self.file_type)
rows = [row for _, row in row_iterator]

if not rows:
raise ImportAssessmentException(
"The import file is empty or contains no valid rows.", row_num=1
)

original_headers = rows[0].keys()
headers_mapping = {
header: self.to_snake_case(header) for header in original_headers
}
headers_mapping = convert_headers_to_snake_case(list(original_headers))
snake_case_headers = list(headers_mapping.values())
self.validate_headers(snake_case_headers, MANDATORY_HEADERS, row_num=1)
self.validate_headers(snake_case_headers, row_num=1)
transformed_rows = [
{headers_mapping[key]: value for key, value in row.items()} for row in rows
]
Expand Down Expand Up @@ -214,9 +213,7 @@ def create_shadow_assessment_from_row(
)
assessment.questions.append(question)

def validate_headers(
self, headers: list[str], MANDATORY_HEADERS: list[str], row_num: int
) -> None:
def validate_headers(self, headers: list[str], row_num: int) -> None:
missing_headers = [
header for header in MANDATORY_HEADERS if header not in headers
]
Expand Down
31 changes: 30 additions & 1 deletion home/import_helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# The error messages are processed and parsed into a list of messages we return to the user
import csv
import re
from collections.abc import Iterator
from datetime import datetime
from io import BytesIO, StringIO
Expand Down Expand Up @@ -165,6 +166,30 @@ def extract_errors(data: dict[str | int, Any] | list[str]) -> dict[str, str]:
return error_message


def check_empty_rows(rows: list[dict[str, Any]], row_num: int) -> None:
"""
Checks if the list of rows is empty and raises an exception if true.
"""
if not rows:
raise ImportException(
"The import file is empty or contains no valid rows.", row_num=row_num
)


def convert_headers_to_snake_case(headers: list[str]) -> dict[str, str]:
"""
Converts a list of headers to snake_case and returns a mapping.
"""
return {header: to_snake_case(header) for header in headers}


def to_snake_case(s: str) -> str:
"""
Converts string to snake_case.
"""
return re.sub(r"[\W_]+", "_", s).lower().strip("_")


def fix_rows(rows: Iterator[dict[str | Any, Any]]) -> Iterator[dict[str, str | None]]:
"""
Fix keys for all rows by lowercasing keys and removing whitespace from keys and values
Expand Down Expand Up @@ -210,7 +235,11 @@ def parse_file(
file_content: bytes, file_type: str
) -> Iterator[tuple[int, dict[str, Any]]]:
read_rows = read_xlsx if file_type == "XLSX" else read_csv
return enumerate(fix_rows(read_rows(file_content)), start=2)
rows = list(fix_rows(read_rows(file_content)))

check_empty_rows(rows, row_num=1)

return enumerate(rows, start=2)


def read_csv(file_content: bytes) -> Iterator[dict[str, Any]]:
Expand Down
6 changes: 4 additions & 2 deletions home/tests/test_assessment_import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,9 +706,11 @@ def test_empty_rows(self, csv_impexp: ImportExport) -> None:
Importing an empty CSV should return an error that the
import file has no rows.
"""
with pytest.raises(ImportAssessmentException) as e:
with pytest.raises(ImportException) as e:
csv_impexp.import_file("empty.csv")
assert e.value.message == "The import file is empty or contains no valid rows."
assert e.value.message == [
"The import file is empty or contains no valid rows."
]
assert e.value.row_num == 1


Expand Down

0 comments on commit c103c9a

Please sign in to comment.