From 749dae4386642fdf54d72e2612e71788f7be0bfd Mon Sep 17 00:00:00 2001 From: Shane Holloman Date: Wed, 2 Oct 2024 04:07:24 +1300 Subject: [PATCH] improved file type handling for edge cases and bumping to v3.6.0 --- src/codemapper/__init__.py | 2 +- src/codemapper/codemapper.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/codemapper/__init__.py b/src/codemapper/__init__.py index 927ec4c..319c747 100644 --- a/src/codemapper/__init__.py +++ b/src/codemapper/__init__.py @@ -6,6 +6,6 @@ creating detailed Markdown documentation of their structure and contents. """ -__version__ = "3.5.3" +__version__ = "3.6.0" # Any other necessary imports or package-level code can go here diff --git a/src/codemapper/codemapper.py b/src/codemapper/codemapper.py index 4d46bcf..aaa789d 100644 --- a/src/codemapper/codemapper.py +++ b/src/codemapper/codemapper.py @@ -385,11 +385,28 @@ def is_large_file(file_path: str) -> bool: bool: True if the file is considered large or binary, False otherwise. """ _, ext = os.path.splitext(file_path.lower()) + file_name = os.path.basename(file_path) + + # Check if it's in LARGE_FILE_EXTENSIONS if ext in LARGE_FILE_EXTENSIONS: return True + # Check if it's in CODE_FENCE_MAP + if ext in CODE_FENCE_MAP or file_name in CODE_FENCE_MAP: + return False + + # Fallback to MIME type check mime_type, _ = mimetypes.guess_type(file_path) - return bool(mime_type) and not mime_type.startswith("text") + if mime_type: + # List of MIME types that are considered text-based + text_mime_types = [ + "text/", "application/json", "application/javascript", "application/xml", + "application/x-httpd-php", "application/x-sh", "application/x-csh" + ] + return not any(mime_type.startswith(text_type) for text_type in text_mime_types) + + # If MIME type couldn't be determined, assume it's not a large file + return False def get_file_info(file_path: str) -> str: