Skip to content
This repository has been archived by the owner on Nov 19, 2024. It is now read-only.

Commit

Permalink
Merge pull request dandi#1415 from dandi/dandigh-1380
Browse files Browse the repository at this point in the history
Use `yarl` to clean up some code
  • Loading branch information
yarikoptic authored Mar 6, 2024
2 parents 4a1f7a7 + e58df62 commit ee18a15
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 22 deletions.
11 changes: 6 additions & 5 deletions dandi/dandiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
from time import sleep, time
from types import TracebackType
from typing import TYPE_CHECKING, Any, Dict, List, Optional
from urllib.parse import quote_plus, urlparse, urlunparse

import click
from dandischema import models
from pydantic import BaseModel, Field, PrivateAttr
import requests
import tenacity
from yarl import URL

from . import get_logger
from .consts import (
Expand Down Expand Up @@ -1521,7 +1521,7 @@ def get_content_url(
else:
raise # reraise since we need to figure out how to handle such a case
if strip_query:
url = urlunparse(urlparse(url)._replace(query=""))
url = str(URL(url).with_query(None))
return url

def get_download_file_iter(
Expand Down Expand Up @@ -1970,9 +1970,10 @@ def get_download_file_iter(
Returns a function that when called (optionally with an offset into the
file to start downloading at) returns a generator of chunks of the file
"""
prefix = quote_plus(str(self))
url = self.client.get_url(
f"/zarr/{self.zarr_id}/files?prefix={prefix}&download=true"
url = str(
URL(self.client.get_url(f"/zarr/{self.zarr_id}/files/")).with_query(
{"prefix": str(self), "download": "true"}
)
)

def downloader(start_at: int = 0) -> Iterator[bytes]:
Expand Down
8 changes: 6 additions & 2 deletions dandi/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path

import click
from yarl import URL

from .consts import DRAFT, ZARR_EXTENSIONS, DandiInstance, dandiset_metadata_file
from .dandiapi import DandiAPIClient, RemoteAsset, RemoteDandiset
Expand Down Expand Up @@ -246,5 +247,8 @@ def find_local_asset(filepath: str) -> tuple[str, str]:


def is_same_url(url1: str, url2: str) -> bool:
# TODO: Use a real URL library like furl, purl, or yarl
return url1.rstrip("/") == url2.rstrip("/")
u1 = URL(url1)
u1 = u1.with_path(u1.path.rstrip("/"))
u2 = URL(url2)
u2 = u2.with_path(u2.path.rstrip("/"))
return u1 == u2
15 changes: 14 additions & 1 deletion dandi/tests/test_delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .fixtures import DandiAPI, SampleDandiset
from ..consts import DRAFT, dandiset_metadata_file
from ..dandiapi import RESTFullAPIClient
from ..delete import delete
from ..delete import delete, is_same_url
from ..download import download
from ..exceptions import NotFoundError
from ..utils import list_paths
Expand Down Expand Up @@ -439,3 +439,16 @@ def test_delete_zarr_path(
assert list_paths(tmp_path) == [
tmp_path / zarr_dandiset.dandiset_id / "dandiset.yaml"
]


@pytest.mark.parametrize(
"url1,url2,r",
[
("https://example.com/api", "https://example.com/api/", True),
("https://example.com/api", "http://example.com/api", False),
("https://example.com/api", "HTTPS://EXAMPLE.COM/api", True),
("https://example.珠宝/api", "https://example.xn--pbt977c/api", True),
],
)
def test_is_same_url(url1: str, url2: str, r: bool) -> None:
assert is_same_url(url1, url2) is r
32 changes: 18 additions & 14 deletions dandi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@
import traceback
import types
from typing import IO, Any, List, Optional, Protocol, TypeVar, Union
from urllib.parse import parse_qs, urlparse, urlunparse

import dateutil.parser
from multidict import MultiDict # dependency of yarl
from pydantic import BaseModel, Field
import requests
import ruamel.yaml
from semantic_version import Version
from yarl import URL

from . import __version__, get_logger
from .consts import DandiInstance, known_instances, known_instances_rev
Expand Down Expand Up @@ -567,8 +568,9 @@ def get_instance(dandi_instance_id: str | DandiInstance) -> DandiInstance:
else:
instance = None
is_api = False
bits = urlparse(redirector_url)
redirector_url = urlunparse((bits[0], bits[1], "", "", "", ""))
redirector_url = str(
URL(redirector_url).with_path("").with_query(None).with_fragment(None)
)
else:
dandi_id = dandi_instance_id
instance = known_instances[dandi_id]
Expand Down Expand Up @@ -619,13 +621,13 @@ def _get_instance(
if dandi_id is None:
# Don't use pydantic.AnyHttpUrl, as that sets the `port` attribute even
# if it's not present in the string.
bits = urlparse(api_url)
if bits.hostname is not None:
dandi_id = bits.hostname
if bits.port is not None:
u = URL(api_url)
if u.host is not None:
dandi_id = u.host
if (port := u.explicit_port) is not None:
if ":" in dandi_id:
dandi_id = f"[{dandi_id}]"
dandi_id += f":{bits.port}"
dandi_id += f":{port}"
else:
dandi_id = api_url
return DandiInstance(
Expand Down Expand Up @@ -790,12 +792,14 @@ def is_page2_url(page1: str, page2: str) -> bool:
Tests whether the URL ``page2`` is the same as ``page1`` but with the
``page`` query parameter set to ``2``
"""
bits1 = urlparse(page1)
params1 = parse_qs(bits1.query)
params1["page"] = ["2"]
bits2 = urlparse(page2)
params2 = parse_qs(bits2.query)
return (bits1[:3], params1, bits1.fragment) == (bits2[:3], params2, bits2.fragment)
url1 = URL(page1)
params1 = MultiDict(url1.query)
params1["page"] = "2"
url1 = url1.with_query(None)
url2 = URL(page2)
params2 = url2.query
url2 = url2.with_query(None)
return (url1, sorted(params1.items())) == (url2, sorted(params2.items()))


def exclude_from_zarr(path: Path) -> bool:
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ install_requires =
ruamel.yaml >=0.15, <1
semantic-version
tenacity
yarl ~= 1.9
zarr ~= 2.10
zarr_checksum ~= 0.4.0
zip_safe = False
Expand Down

0 comments on commit ee18a15

Please sign in to comment.