Skip to content

Commit

Permalink
Removed dirlisthost from returned paths from various calls
Browse files Browse the repository at this point in the history
Also created a work-around for isdir and isfile
  • Loading branch information
turetske committed May 21, 2024
1 parent 9f7be59 commit c969f84
Showing 1 changed file with 63 additions and 23 deletions.
86 changes: 63 additions & 23 deletions src/pelicanfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def _dirlist_dec(func):
Decorator function which, when given a namespace location, get the url for the dirlist location from the headers
and uses that url for the given function.
This is for functions which need to list information in the origin directories such as "find", "isdir", "ls"
This is for functions which need to get information from origin directories "ls", "du", "info", etc.
"""
async def wrapper(self, *args, **kwargs):
path = self._check_fspath(args[0])
Expand All @@ -334,17 +334,67 @@ async def wrapper(self, *args, **kwargs):
return result
return wrapper


def _remove_dirlist_from_path(self, path):
parsed_url = urllib.parse.urlparse(path)
updated_url = parsed_url._replace(netloc="", scheme="")
return urllib.parse.urlunparse(updated_url)

def _remove_dirlist_from_paths(self, paths):
if isinstance(paths, list):
return [self._remove_dirlist_from_paths(path) for path in paths]

if isinstance(paths, dict):
if 'name' in paths:
path = paths['name']
paths['name'] = self._remove_dirlist_from_path(path)
if 'url' in paths:
url = paths['url']
paths['url'] = self._remove_dirlist_from_path(url)
return paths
else:
new_dict = {}
for key, item in paths.items():
new_key = self._remove_dirlist_from_path(key)
new_item = self._remove_dirlist_from_paths(item)
new_dict[new_key] = new_item
return new_dict

if isinstance(paths, str):
return self._remove_dirlist_from_path(paths)

return paths

def _dirlist_dec(func):
"""
Decorator function which, when given a namespace location, get the url for the dirlist location from the headers
and uses that url for the given function. It then normalizes the paths or list of paths returned by the function
This is for functions which need to retrieve information from origin directories such as "find", "ls", "info", etc.
"""
async def wrapper(self, *args, **kwargs):
path = self._check_fspath(args[0])
dataUrl = await self.get_dirlist_url(path)
return await func(self, dataUrl, *args[1:], **kwargs)
return wrapper


@_dirlist_dec
async def _ls(self, path, detail=True, **kwargs):
return await self.httpFileSystem._ls(path, detail, **kwargs)
results = await self.httpFileSystem._ls(path, detail, **kwargs)
return self._remove_dirlist_from_paths(results)

@_dirlist_dec
async def _isdir(self, path):
return await self.httpFileSystem._isdir(path)

@_dirlist_dec
async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
return await self.httpFileSystem._find(path, maxdepth, withdirs, **kwargs)
results = await self.httpFileSystem._find(path, maxdepth, withdirs, **kwargs)
return self._remove_dirlist_from_paths(results)

async def _isfile(self, path):
return not await self._isdir(path)

async def _glob(self, path, maxdepth=None, **kwargs):
"""
Expand Down Expand Up @@ -404,25 +454,18 @@ async def _glob(self, path, maxdepth=None, **kwargs):
root, maxdepth=depth, withdirs=True, detail=True, **kwargs
)

pattern = glob_translate(path + ("/" if ends_with_slash else ""))
pattern = glob_translate(self._remove_dirlist_from_path(path) + ("/" if ends_with_slash else ""))
pattern = re.compile(pattern)

allpaths_cleaned = {}
for p, info in allpaths.items():
parsed = list(urllib.parse.urlparse(p))
parsed[2] = re.sub("/{2,}", "/", parsed[2])
cleaned = urllib.parse.urlunparse(parsed)
allpaths_cleaned[cleaned] = info

out = {
(
p.rstrip("/")
self._remove_dirlist_from_path(p.rstrip("/"))
if not append_slash_to_dirname
and info["type"] == "directory"
and p.endswith("/")
else p
else self._remove_dirlist_from_path(p)
): info
for p, info in sorted(allpaths_cleaned.items())
for p, info in sorted(allpaths.items())
if pattern.match(p.rstrip("/"))
}

Expand All @@ -434,7 +477,8 @@ async def _glob(self, path, maxdepth=None, **kwargs):

@_dirlist_dec
async def _info(self, path, **kwargs):
return await self.httpFileSystem._info(path, **kwargs)
results = await self.httpFileSystem._info(path, **kwargs)
return self._remove_dirlist_from_paths(results)

@_dirlist_dec
async def _du(self, path, total=True, maxdepth=None, **kwargs):
Expand All @@ -445,7 +489,7 @@ async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
path = self._check_fspath(path)
listUrl = await self.get_dirlist_url(path)
async for _ in self.httpFileSystem._walk(listUrl, maxdepth, on_error, **kwargs):
yield _
yield self._remove_dirlist_from_path(_)

def _io_wrapper(self, func):
"""
Expand Down Expand Up @@ -496,10 +540,10 @@ def _check_fspath(self, path: str) -> str:
path = pelican_url.path
return path

def open(self, path, **kwargs):
def open(self, path, mode, **kwargs):
path = self._check_fspath(path)
data_url = sync(self.loop, self.get_origin_cache if self.directReads else self.get_working_cache, path)
fp = self.httpFileSystem.open(data_url, **kwargs)
fp = self.httpFileSystem.open(data_url, mode, **kwargs)
fp.read = self._io_wrapper(fp.read)
return fp

Expand Down Expand Up @@ -587,10 +631,6 @@ async def _cat_file(self, path, start=None, end=None, **kwargs):
async def _exists(self, path, **kwargs):
return await self.httpFileSystem._exists(path, **kwargs)

@_cache_dec
async def _isfile(self, path, **kwargs):
return await self.httpFileSystem._isfile(path, **kwargs)

@_cache_dec
async def _get_file(self, rpath, lpath, **kwargs):
return await self.httpFileSystem._get_file(rpath, lpath, **kwargs)
Expand Down

0 comments on commit c969f84

Please sign in to comment.