Skip to content

Commit

Permalink
when indexing for search, trust get_text_content
Browse files Browse the repository at this point in the history
otherwise we're checking for PDFs in multiple places
  • Loading branch information
longhotsummer committed Sep 3, 2023
1 parent d318ad6 commit 7143c77
Showing 1 changed file with 9 additions and 12 deletions.
21 changes: 9 additions & 12 deletions peachjam_search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,18 +245,15 @@ def prepare_order_outcome(self, instance):
def prepare_pages(self, instance):
"""Text content of pages extracted from PDF."""
if not instance.content_html:
if hasattr(
instance, "source_file"
) and instance.source_file.filename.endswith(".pdf"):
text = instance.get_content_as_text()
page_texts = text.split("\x0c")
pages = []
for i, page in enumerate(page_texts):
i = i + 1
page = page.strip()
if page:
pages.append({"page_num": i, "body": page})
return pages
text = instance.get_content_as_text()
page_texts = text.split("\x0c")
pages = []
for i, page in enumerate(page_texts):
i = i + 1
page = page.strip()
if page:
pages.append({"page_num": i, "body": page})
return pages

def prepare_taxonomies(self, instance):
"""Taxonomy topics are stored as slugs of all the items in the tree down to that topic. This is easier than
Expand Down

0 comments on commit 7143c77

Please sign in to comment.