Skip to content

Commit

Permalink
Merge pull request #363 from DESm1th/tech_note_fix
Browse files Browse the repository at this point in the history
[FIX] Find non-pdf tech notes
  • Loading branch information
DESm1th authored Aug 15, 2024
2 parents db9b603 + e4de283 commit d1e7ed9
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 20 deletions.
42 changes: 26 additions & 16 deletions datman/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1334,11 +1334,7 @@ def make_zip(source_dir, dest_zip):


def find_tech_notes(folder):
"""Find any technotes located within a folder.
If only one PDF is found it is assumed to be the tech notes. If multiple
are found, unless one contains the string 'TechNotes', the first pdf is
guessed to be the tech notes.
"""Find any technotes located within a given folder.
Args:
folder (str): A full path to a folder to search.
Expand All @@ -1347,21 +1343,35 @@ def find_tech_notes(folder):
path (str): The full path to the tech notes or an empty string if
none have been found.
"""
pdf_list = []
for root, dirs, files in os.walk(folder):
exts = ["pdf", "png", "jpg"]
notes = []
for root, _, files in os.walk(folder):
for fname in files:
if ".pdf" in fname:
pdf_list.append(os.path.join(root, fname))
if any([fname.endswith(ext) for ext in exts]):
notes.append(os.path.join(root, fname))

if not notes:
return ""

if not pdf_list:
# find the file most likely to be the tech notes
scored = []
for item in notes:
score = 0
if "tech" in item.lower():
score += 3
if "note" in item.lower():
score += 2
if item.endswith("pdf"):
score += 1
scored.append((item, score))

result = sorted(scored, key=lambda x: x[1], reverse=True)

if result[0][1] == 0:
# No files scored as likely to be the notes
return ""
elif len(pdf_list) > 1:
for pdf in pdf_list:
file_name = os.path.basename(pdf)
if 'technotes' in file_name.lower():
return pdf

return pdf_list[0]
return result[0][0]


def read_json(path):
Expand Down
44 changes: 40 additions & 4 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def test_catches_invalid_site_in_kcni_id(self, dm_config):

class FindTechNotes(unittest.TestCase):
notes = "TechNotes.pdf"
jpg_notes = "TechNotes.jpg"
other_pdf1 = "SomeFile.pdf"
other_pdf2 = "otherFile.pdf"
path = "./resources"
Expand All @@ -136,7 +137,7 @@ def test_doesnt_crash_with_broken_path(self):
@patch('os.walk', autospec=True)
def test_doesnt_crash_when_no_tech_notes_exist(self, mock_walk):
mock_walk.return_value = self.__mock_file_system(
randint(1, 10), add_notes=False)
randint(1, 10), add_pdf_notes=False)

found_file = utils.find_tech_notes(self.path)

Expand All @@ -163,20 +164,55 @@ def test_returns_tech_notes_when_multiple_pdfs_present(self, mock_walk):
def test_first_file_returned_when_multiple_pdfs_but_no_tech_notes(
self, mock_walk):
mock_walk.return_value = self.__mock_file_system(
randint(1, 10), add_notes=False, add_pdf=True)
randint(1, 10), add_pdf_notes=False, add_pdf=True)

found_file = utils.find_tech_notes(self.path)

assert os.path.basename(found_file) == self.other_pdf1

def __mock_file_system(self, depth, add_notes=True, add_pdf=False):
@patch('os.walk', autospec=True)
def test_finds_non_pdf_tech_notes(self, mock_walk):
mock_walk.return_value = self.__mock_file_system(
randint(1, 10), add_pdf_notes=False, add_pdf=True,
add_jpg_notes=True)

found_file = utils.find_tech_notes(self.path)

assert os.path.basename(found_file) == self.jpg_notes

@patch('os.walk', autospec=True)
def test_doesnt_pick_similarly_named_file(self, mock_walk):
mock_walk.return_value = self.__mock_file_system(
randint(1, 10), add_pdf_notes=False, add_pdf=True,
add_jpg_notes=True, add_jpgs=True)

found_file = utils.find_tech_notes(self.path)

assert os.path.basename(found_file) == self.jpg_notes

@patch('os.walk', autospec=True)
def test_prefers_pdf_notes_over_other_formats(self, mock_walk):
mock_walk.return_value = self.__mock_file_system(
randint(1, 10), add_pdf_notes=True, add_jpg_notes=True,
add_pdf=True, add_jpgs=True)

found_file = utils.find_tech_notes(self.path)

assert os.path.basename(found_file) == self.notes

def __mock_file_system(self, depth, add_pdf_notes=True, add_jpgs=False,
add_jpg_notes=False, add_pdf=False):
walk_list = []
cur_path = self.path
file_list = ["file1.txt", "file2"]
if add_pdf:
file_list.extend([self.other_pdf1, self.other_pdf2])
if add_notes:
if add_jpg_notes:
file_list.extend([self.jpg_notes])
if add_pdf_notes:
file_list.append(self.notes)
if add_jpgs:
file_list.extend(['SpiralView.jpg', 'RANotes.jpg'])
for num in range(1, depth + 1):
cur_path = cur_path + "/dir{}".format(num)
dirs = ("dir{}".format(num + 1), )
Expand Down

0 comments on commit d1e7ed9

Please sign in to comment.