Skip to content

Commit

Permalink
improved file type handling for edge cases and bumping to v3.6.0
Browse files Browse the repository at this point in the history
  • Loading branch information
shaneholloman committed Oct 1, 2024
1 parent 570486b commit 749dae4
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/codemapper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
creating detailed Markdown documentation of their structure and contents.
"""

__version__ = "3.5.3"
__version__ = "3.6.0"

# Any other necessary imports or package-level code can go here
19 changes: 18 additions & 1 deletion src/codemapper/codemapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,11 +385,28 @@ def is_large_file(file_path: str) -> bool:
bool: True if the file is considered large or binary, False otherwise.
"""
_, ext = os.path.splitext(file_path.lower())
file_name = os.path.basename(file_path)

# Check if it's in LARGE_FILE_EXTENSIONS
if ext in LARGE_FILE_EXTENSIONS:
return True

# Check if it's in CODE_FENCE_MAP
if ext in CODE_FENCE_MAP or file_name in CODE_FENCE_MAP:
return False

# Fallback to MIME type check
mime_type, _ = mimetypes.guess_type(file_path)
return bool(mime_type) and not mime_type.startswith("text")
if mime_type:
# List of MIME types that are considered text-based
text_mime_types = [
"text/", "application/json", "application/javascript", "application/xml",
"application/x-httpd-php", "application/x-sh", "application/x-csh"
]
return not any(mime_type.startswith(text_type) for text_type in text_mime_types)

# If MIME type couldn't be determined, assume it's not a large file
return False


def get_file_info(file_path: str) -> str:
Expand Down

0 comments on commit 749dae4

Please sign in to comment.