Skip to content

Commit

Permalink
Merge pull request #44 from nexB/posix-safe-filename
Browse files Browse the repository at this point in the history
Make safe filename safe to use on POSIX
  • Loading branch information
pombredanne authored Aug 24, 2022
2 parents d3eed9a + b88d65a commit c31bc0e
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 24 deletions.
12 changes: 8 additions & 4 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
Release notes
=============

Version (next)
Version (next)
------------------------------

TBD.
- Add ``posix_only`` option to ``commoncode.paths.portable_filename`` and
``commoncode.paths.safe_path``. This option prevents
``commoncode.paths.portable_filename`` and ``commoncode.paths.safe_path`` from
replacing filenames and punctuation in filenames that are valid on POSIX
operating systems, but not Windows.

Version 31.0.0 - (2022-05-16)
------------------------------
Expand Down Expand Up @@ -50,7 +54,7 @@ This is a major version with API-breaking changes in the resource module.
otherwise missing from files path list.
In particular this behaviour changed when you create a VirtualCodebase from
a previous Codebase created with a "full_root" argument. Previously, the
missing paths of a "full_root" Codebase were kept unchanged.
missing paths of a "full_root" Codebase were kept unchanged.
Note that the VirtualCodebase has always ignored the "full_root" argument.

- The Codebase and VirtualCodebase are now iterable. Iterating on a codebase
Expand Down Expand Up @@ -80,7 +84,7 @@ Other changes:

- Remove Python upper version limit.
- Merge latest skeleton
- fileutils.parent_directory() now accepts a "with_trail" argument.
- fileutils.parent_directory() now accepts a "with_trail" argument.
The returned directory has a trailing path separator unless with_trail is False.
The default is True and the default behaviour is unchanged.

Expand Down
63 changes: 43 additions & 20 deletions src/commoncode/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# Build OS-portable and safer paths


def safe_path(path, posix=False, preserve_spaces=False):
def safe_path(path, posix=False, preserve_spaces=False, posix_only=False):
"""
Convert `path` to a safe and portable POSIX path usable on multiple OSes.
The returned path is an ASCII-only byte string, resolved for relative
Expand All @@ -52,7 +52,13 @@ def safe_path(path, posix=False, preserve_spaces=False):
_pathmod, path_sep = path_handlers(path, posix)

segments = [s.strip() for s in path.split(path_sep) if s.strip()]
segments = [portable_filename(s, preserve_spaces=preserve_spaces) for s in segments]
segments = [
portable_filename(
s,
preserve_spaces=preserve_spaces,
posix_only=posix_only
) for s in segments
]

if not segments:
return '_'
Expand Down Expand Up @@ -134,17 +140,34 @@ def resolve(path, posix=True):
return path


legal_punctuation = r"!\#$%&\(\)\+,\-\.;\=@\[\]_\{\}\~"
legal_spaces = r" "
legal_chars = r'A-Za-z0-9' + legal_punctuation
legal_punctuation = r'!\#$%&\(\)\+,\-\.;\=@\[\]_\{\}\~'
legal_spaces = r' '
legal_alphanumeric = r'A-Za-z0-9'
legal_chars = legal_alphanumeric + legal_punctuation
legal_chars_inc_spaces = legal_chars + legal_spaces
illegal_chars_re = r'[^' + legal_chars + r']'
illegal_chars_exc_spaces_re = r'[^' + legal_chars_inc_spaces + r']'
replace_illegal_chars = re.compile(illegal_chars_re).sub
replace_illegal_chars_exc_spaces = re.compile(illegal_chars_exc_spaces_re).sub


def portable_filename(filename, preserve_spaces=False):
posix_legal_punctuation = r'<:"/>\|\*\^\\\'`\?' + legal_punctuation
posix_legal_chars = legal_alphanumeric + posix_legal_punctuation
posix_legal_chars_inc_spaces = posix_legal_chars + legal_spaces
posix_illegal_chars_re = r'[^' + posix_legal_chars + r']'
posix_illegal_chars_exc_spaces_re = r'[^' + posix_legal_chars_inc_spaces + r']'
replace_illegal_posix_chars = re.compile(posix_illegal_chars_re).sub
replace_illegal_posix_chars_exc_spaces = re.compile(posix_illegal_chars_exc_spaces_re).sub


ILLEGAL_WINDOWS_NAMES = set([
'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
'aux', 'con', 'nul', 'prn'
])


def portable_filename(filename, preserve_spaces=False, posix_only=False):
"""
Return a new name for `filename` that is portable across operating systems.
Expand All @@ -170,22 +193,21 @@ def portable_filename(filename, preserve_spaces=False):
if not filename:
return '_'

if preserve_spaces:
filename = replace_illegal_chars_exc_spaces('_', filename)
if posix_only:
if preserve_spaces:
filename = replace_illegal_posix_chars_exc_spaces('_', filename)
else:
filename = replace_illegal_posix_chars('_', filename)
else:
filename = replace_illegal_chars('_', filename)

# these are illegal both upper and lowercase and with or without an extension
# we insert an underscore after the base name.
windows_illegal_names = set([
'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
'aux', 'con', 'nul', 'prn'
])
if preserve_spaces:
filename = replace_illegal_chars_exc_spaces('_', filename)
else:
filename = replace_illegal_chars('_', filename)

basename, dot, extension = filename.partition('.')
if basename.lower() in windows_illegal_names:
filename = ''.join([basename, '_', dot, extension])
if not posix_only:
basename, dot, extension = filename.partition('.')
if basename.lower() in ILLEGAL_WINDOWS_NAMES:
filename = ''.join([basename, '_', dot, extension])

# no name made only of dots.
if set(filename) == set(['.']):
Expand All @@ -198,6 +220,7 @@ def portable_filename(filename, preserve_spaces=False):

return filename


#
# paths comparisons, common prefix and suffix extraction
#
Expand Down
26 changes: 26 additions & 0 deletions tests/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ def test_safe_path_posix_style_many_dots(self):
expected = 'dotdot/dotdot/dotdot/webform.components.inc'
assert test == expected

def test_safe_path_posix_only(self):
test_path = 'var/lib/dpkg/info/libgsm1:amd64.list'
test = paths.safe_path(test_path)
expected = 'var/lib/dpkg/info/libgsm1_amd64.list'
assert test == expected
test = paths.safe_path(test_path, posix_only=True)
assert test == test_path

def test_resolve_mixed_slash(self):
test = paths.resolve('C:\\..\\./drupal.js')
expected = 'C/drupal.js'
Expand Down Expand Up @@ -140,6 +148,24 @@ def test_portable_filename(self):
expected = 'This_contain_UMLAUT_umlauts.txt'
assert paths.portable_filename(u'This contain UMLAUT \xfcml\xe4uts.txt') == expected

# Check to see if illegal Windows filenames are properly handled
for illegal_window_name in paths.ILLEGAL_WINDOWS_NAMES:
# Rename files with names that are illegal on Windows
expected = f'{illegal_window_name}_'
assert paths.portable_filename(illegal_window_name) == expected

# Allow files with names that are illegal on Windows
assert paths.portable_filename(illegal_window_name, posix_only=True) == illegal_window_name

# Check to see if the posix_only option does and does not replace
# punctuation characters that are illegal in Windows filenames
for valid_posix_path_char in paths.posix_legal_punctuation:
test_name = f'test{valid_posix_path_char}'
assert paths.portable_filename(test_name, posix_only=True) == test_name
if valid_posix_path_char not in paths.legal_punctuation:
expected = f'test_'
assert paths.portable_filename(test_name) == expected


class TestCommonPath(TestCase):

Expand Down

0 comments on commit c31bc0e

Please sign in to comment.