From 43b0ca1be1df11192711ae584263dc6520053e37 Mon Sep 17 00:00:00 2001 From: Darkhood148 Date: Tue, 5 Mar 2024 18:48:59 +0530 Subject: [PATCH] fix encoding issue in get_file_from_iso_fp --- pycdlib/pycdlib.py | 56 +++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/pycdlib/pycdlib.py b/pycdlib/pycdlib.py index 5545c7df..ee3bfa6e 100644 --- a/pycdlib/pycdlib.py +++ b/pycdlib/pycdlib.py @@ -495,7 +495,6 @@ def _find_dr_record_by_name(vd, path, encoding): return root_dir_record splitpath = utils.split_path(path) - currpath = splitpath.pop(0).decode('utf-8').encode(encoding) entry = root_dir_record @@ -518,7 +517,6 @@ def _find_dr_record_by_name(vd, path, encoding): index = lo if index != len(thelist) and thelist[index].file_ident == currpath: child = thelist[index] - if child is None: # We failed to find this component of the path, so break out of the # loop and fail. @@ -533,7 +531,6 @@ def _find_dr_record_by_name(vd, path, encoding): # We found the last child we are looking for; return it. if not splitpath: return child - if not child.is_dir(): break entry = child @@ -734,8 +731,8 @@ def _find_iso_record(self, iso_path, encoding='utf-8'): return _find_dr_record_by_name(self.pvd, iso_path, encoding) @lru_cache(maxsize=256) - def _find_rr_record(self, rr_path): - # type: (bytes) -> dr.DirectoryRecord + def _find_rr_record(self, rr_path, encoding='utf-8'): + # type: (bytes, str) -> dr.DirectoryRecord """ An internal method to find a directory record on the ISO given a Rock Ridge path. If the entry is found, it returns the directory record @@ -755,7 +752,7 @@ def _find_rr_record(self, rr_path): splitpath = utils.split_path(rr_path) - currpath = splitpath.pop(0).decode('utf-8').encode('utf-8') + currpath = splitpath.pop(0).decode('utf-8').encode(encoding) entry = root_dir_record @@ -806,13 +803,13 @@ def _find_rr_record(self, rr_path): if not child.is_dir(): break entry = child - currpath = splitpath.pop(0).decode('utf-8').encode('utf-8') + currpath = splitpath.pop(0).decode('utf-8').encode(encoding) raise pycdlibexception.PyCdlibInvalidInput('Could not find path') @lru_cache(maxsize=256) - def _find_joliet_record(self, joliet_path): - # type: (bytes) -> dr.DirectoryRecord + def _find_joliet_record(self, joliet_path, encoding='utf-16_be'): + # type: (bytes, str) -> dr.DirectoryRecord """ An internal method to find a directory record on the ISO given a Joliet path. If the entry is found, it returns the directory record object @@ -826,7 +823,7 @@ def _find_joliet_record(self, joliet_path): """ if self.joliet_vd is None: raise pycdlibexception.PyCdlibInternalError('Joliet path requested on non-Joliet ISO') - return _find_dr_record_by_name(self.joliet_vd, joliet_path, 'utf-16_be') + return _find_dr_record_by_name(self.joliet_vd, joliet_path, encoding) @lru_cache(maxsize=256) def _find_udf_record(self, udf_path): @@ -2425,8 +2422,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path): utils.copy_data(data_len, blocksize, data_fp, outfp) def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path, - joliet_path): - # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes]) -> None + joliet_path, encoding=None): + # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str) -> None """ An internal method to fetch a single file from the ISO and write it out to the file object. @@ -2446,13 +2443,16 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path, if joliet_path is not None: if self.joliet_vd is None: raise pycdlibexception.PyCdlibInvalidInput('Cannot fetch a joliet_path from a non-Joliet ISO') - found_record = self._find_joliet_record(joliet_path) + encoding = encoding or 'utf-16_be' + found_record = self._find_joliet_record(joliet_path, encoding) elif rr_path is not None: if not self.rock_ridge: raise pycdlibexception.PyCdlibInvalidInput('Cannot fetch a rr_path from a non-Rock Ridge ISO') - found_record = self._find_rr_record(rr_path) + encoding = encoding or 'utf-8' + found_record = self._find_rr_record(rr_path, encoding) elif iso_path is not None: - found_record = self._find_iso_record(iso_path) + encoding = encoding or 'utf-8' + found_record = self._find_iso_record(iso_path, encoding) else: raise pycdlibexception.PyCdlibInternalError('Invalid path passed to get_file_from_iso_fp') @@ -3502,8 +3502,8 @@ def _get_iso_entry(self, iso_path, encoding='utf-8'): return self._find_iso_record(iso_path, encoding) - def _get_rr_entry(self, rr_path): - # type: (bytes) -> dr.DirectoryRecord + def _get_rr_entry(self, rr_path, encoding='utf-8'): + # type: (bytes, str) -> dr.DirectoryRecord """ Internal method to get the directory record for a Rock Ridge path. @@ -3516,10 +3516,10 @@ def _get_rr_entry(self, rr_path): if self._needs_reshuffle: self._reshuffle_extents() - return self._find_rr_record(rr_path) + return self._find_rr_record(rr_path, encoding) - def _get_joliet_entry(self, joliet_path): - # type: (bytes) -> dr.DirectoryRecord + def _get_joliet_entry(self, joliet_path, encoding='utf-16_be'): + # type: (bytes, str) -> dr.DirectoryRecord """ Internal method to get the directory record for a Joliet path. @@ -3532,7 +3532,7 @@ def _get_joliet_entry(self, joliet_path): if self._needs_reshuffle: self._reshuffle_extents() - return self._find_joliet_record(joliet_path) + return self._find_joliet_record(joliet_path, encoding) def _get_udf_entry(self, udf_path): # type: (str) -> udfmod.UDFFileEntry @@ -4199,6 +4199,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs): iso_path = None rr_path = None udf_path = None + encoding = None num_paths = 0 for key, value in kwargs.items(): if key == 'blocksize': @@ -4229,6 +4230,8 @@ def get_file_from_iso_fp(self, outfp, **kwargs): num_paths += 1 elif value is not None: raise pycdlibexception.PyCdlibInvalidInput('udf_path must be a string') + elif key == 'encoding': + encoding = value else: raise pycdlibexception.PyCdlibInvalidInput('Unknown keyword %s' % (key)) @@ -4239,7 +4242,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs): self._udf_get_file_from_iso_fp(outfp, blocksize, udf_path) else: self._get_file_from_iso_fp(outfp, blocksize, iso_path, rr_path, - joliet_path) + joliet_path, encoding) def get_and_write(self, iso_path, local_path, blocksize=8192): # type: (str, str, int) -> None @@ -5494,11 +5497,14 @@ def list_children(self, **kwargs): else: use_rr = False if 'joliet_path' in kwargs: - rec = self._get_joliet_entry(self._normalize_joliet_path(kwargs['joliet_path'])) + kwargs['encoding'] = kwargs['encoding'] or 'utf-16_be' + rec = self._get_joliet_entry(self._normalize_joliet_path(kwargs['joliet_path']), kwargs['encoding']) elif 'rr_path' in kwargs: - rec = self._get_rr_entry(utils.normpath(kwargs['rr_path'])) + kwargs['encoding'] = kwargs['encoding'] or 'utf-8' + rec = self._get_rr_entry(utils.normpath(kwargs['rr_path']), kwargs['encoding']) use_rr = True else: + kwargs['encoding'] = kwargs['encoding'] or 'utf-8' rec = self._get_iso_entry(utils.normpath(kwargs['iso_path']), kwargs['encoding']) for c in _yield_children(rec, use_rr): @@ -5925,7 +5931,7 @@ def walk(self, **kwargs): filelist = [] dirdict = {} - for child in reversed(list(self.list_children(**{path_type: relpath, 'encoding': kwargs['encoding']}))): + for child in reversed(list(self.list_children(**{path_type: relpath, 'encoding': kwargs.get('encoding', None)}))): if child is None or child.is_dot() or child.is_dotdot(): continue