Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

user testing results #3

Open
m9brady opened this issue Dec 5, 2024 · 0 comments
Open

user testing results #3

m9brady opened this issue Dec 5, 2024 · 0 comments

Comments

@m9brady
Copy link

m9brady commented Dec 5, 2024

Sharing my not-very-polished test suite. Hope it is helpful! Feel free to close at anytime.

General comments:

  • the DDS method of downloading is really fast! Much nicer experience than the order_id method.
  • the token auth method is good, I wonder if there'd be a better way than storing tokens locally though.
test status script name description
test_01.py same as rapi_dds_test given in repo but without click
test_02.py replication of a brief real use-case order (RCM GRD with Ice LUT for small temporal search window given AOI)
test_03.py similar to test_02 but with concurrent downloads over several threads
test_04.py similar to test_03 but with wider temporal search window. Issue documented in eodms-sgdot/py-eodms-rapi#50
test_01.py
from pathlib import Path
from netrc import netrc
from eodms_dds import dds
from eodms_rapi import EODMSRAPI
import os

def get_item(dds_api, collection, item_uuid, out_folder):

    dds_api.refresh_aaa()

    print(f"collection: {collection}")
    print(f"item_uuid: {item_uuid}")

    item_info = dds_api.get_item(collection, item_uuid)

    print(f"Item info: {item_info}")
    if item_info is None:
        return None

    if 'download_url' not in item_info.keys():
        return None

    dds_api.download_item(os.path.abspath(out_folder))

    return item_info

def extract_uuid(results):

    mdata_full_name = results.get('metadataFullName')
    uuid = os.path.basename(mdata_full_name)
    return uuid

def run(eodms_user, eodms_pwd, collection, env, out_folder):

    dds_api = dds.DDS_API(eodms_user, eodms_pwd, env)

    rapi = EODMSRAPI(eodms_user, eodms_pwd)

    filters = {'Beam Mode Type': ('LIKE', ['%50m%']),
            'Polarization': ('=', 'HH HV'),
            'Incidence Angle': ('>=', 17)}
    
    rapi.search(collection, filters)

    res = rapi.get_results('full')

    # print(f"res: {res[5]}")

    uuid = extract_uuid(res[5])

    item_info = get_item(dds_api, collection, uuid, out_folder)

if __name__ == "__main__":
    eodms_user, _, eodms_password = netrc(Path('~/.netrc').expanduser()).hosts['data.eodms-sgdot.nrcan-rncan.gc.ca']
    # use defaults from Kevin's test
    run(
        eodms_user=eodms_user,
        eodms_pwd=eodms_password, 
        collection="RCMImageProducts", 
        env="prod", 
        out_folder=os.path.expanduser("~/Downloads/eodms-beta-test")
    )
test_02.py
from netrc import netrc
from pathlib import Path
from time import sleep

from eodms_dds import dds
from eodms_rapi import EODMSRAPI

this_file = Path(__file__)
eodms_user, _, eodms_pwd = netrc(
    Path('~/.netrc').expanduser()
).hosts['data.eodms-sgdot.nrcan-rncan.gc.ca']
rapi = EODMSRAPI(eodms_user, eodms_pwd)
# search options 
### the nested tuples/lists/dicts is hard for me to understand but I get that they're necessary 
### for multi-select filters. There HAS to be a more user-friendly way, like if a list of product types is 
### provided, the search-api must be smart enough to use the right operator. Probably needs logic to account 
### for range-type filters like incidence angle too
collection = "RCMImageProducts"
filters = {
    'Product Type': ('=', 'GRD'),
    'LUT Applied': ('=', 'Ice'),
}
features = [
    ('intersects', str(this_file.parent / 'assets' / 'lancaster_gate_30km_buffer_clip.geojson')),
]
dates = [
    {
        "start": "20241101_000000",
        "end": "20241102_000000"
    }
]
### the hit-count kwarg is nice to have for sanity-checks prior to "real" search queries!
rapi.search(collection=collection, filters=filters, features=features, dates=dates)
results = rapi.get_results(form='full')
### right here is where I as a user would want an easy way to either convert results dict 
### to geodataframe or dump to geojson/shp/gpkg in order to narrow down the suitability of images
### since despite intersecting with the AOI it might be a tiny fraction. Using contains/within 
### probably won't help either in the initial query.
# ddsapi needs the uuids which are stored in a couple of spots but this one seems easiest to manipulate
uuids = [r['metadataFullName'].split('/')[-1] for r in results]
# download results
out_dir = Path('~/Downloads/eodms-beta-test').expanduser()
out_dir.mkdir(exist_ok=True)
dds_api = dds.DDS_API(eodms_user, eodms_pwd, environment="prod") # testing in prod! right on!
for item_id in uuids:
    print(item_id)
    item_info = dds_api.get_item(collection=collection, item_uuid=item_id)
    # wait for the download_url to appear in dict keys
    ### this polling is better than polling for EODMS order fulfillment - would be nice to be able to queue up N granules (N decided by account type?)
    while 'download_url' not in item_info.keys(): # could also just check dds_api.img_info? why bother returning item_info then?
        sleep(10)
        item_info = dds_api.get_item(collection=collection, item_uuid=item_id)
    ### download_item() is curious because it doesn't take an item_id but get_item() does...
    ### I guess because the DDS_API class has an img_info attribute that stores the result of get_item()
    ### but then why does get_item() return the json too? 
    dds_api.download_item(out_dir)
test_03.py
from netrc import netrc
from pathlib import Path
from time import sleep
from concurrent.futures import ThreadPoolExecutor

from eodms_dds import dds
from eodms_rapi import EODMSRAPI

this_file = Path(__file__)
eodms_user, _, eodms_pwd = netrc(
    Path('~/.netrc').expanduser()
).hosts['data.eodms-sgdot.nrcan-rncan.gc.ca']
rapi = EODMSRAPI(eodms_user, eodms_pwd)
# search options 
### the nested tuples/lists/dicts is hard for me to understand but I get that they're necessary 
### for multi-select filters. There HAS to be a more user-friendly way, like if a list of product types is 
### provided, the search-api must be smart enough to use the right operator. Probably needs logic to account 
### for range-type filters like incidence angle too
collection = "RCMImageProducts"
filters = {
    'Product Type': ('=', 'GRD'),
    'LUT Applied': ('=', 'Ice'),
}
features = [
    ('intersects', str(this_file.parent / 'assets' / 'lancaster_gate_30km_buffer_clip.geojson')),
]
dates = [
    {
        "start": "20241105_000000",
        "end": "20241106_000000"
    }
]

out_dir = Path('~/Downloads/eodms-beta-test').expanduser()
out_dir.mkdir(exist_ok=True)

### quick-n-dirty function for concurrent use later
def order_and_download(api_obj, item_ids):
    for item in item_ids:
        item_info = api_obj.get_item(collection=collection, item_uuid=item)
        while 'download_url' not in item_info.keys():
            sleep(10)
            item_info = api_obj.get_item(collection=collection, item_uuid=item)
        api_obj.download_item(out_dir)
    return

### the hit-count is nice to have for sanity-checks prior to "real" search queries
rapi.search(collection=collection, filters=filters, features=features, dates=dates)
results = rapi.get_results(form='full')

### note how if the query params are adjusted (or even just the search is repeated with same params), the number
### of results just goes up (due to how dds_api just appends results rather than replaces)

# ddsapi needs the uuids which are stored in a couple of spots but this one seems easiest to manipulate
### need to check for Nones because dds_api will just return None in a lot of cases?
uuids = list(set([r['metadataFullName'].split('/')[-1] for r in results if r is not None]))
# download results
# really filthy concurrent method
n_workers = 4
batches = [uuids[i::n_workers] for i in range(n_workers)]
apis = [dds.DDS_API(eodms_user, eodms_pwd, environment='prod') for _ in range(n_workers)]
with ThreadPoolExecutor(max_workers=n_workers) as executor:
    futures = [executor.submit(order_and_download, api, batch) for api, batch in zip(apis, batches)]
    results = [future.result() for future in futures]
test_04.py
from netrc import netrc
from pathlib import Path
from time import sleep
from concurrent.futures import ThreadPoolExecutor

from eodms_dds import dds
from eodms_rapi import EODMSRAPI

this_file = Path(__file__)
eodms_user, _, eodms_pwd = netrc(
    Path('~/.netrc').expanduser()
).hosts['data.eodms-sgdot.nrcan-rncan.gc.ca']
rapi = EODMSRAPI(eodms_user, eodms_pwd)
# search options 
### the nested tuples/lists/dicts is hard for me to understand but I get that they're necessary 
### for multi-select filters. There HAS to be a more user-friendly way, like if a list of product types is 
### provided, the search-api must be smart enough to use the right operator. Probably needs logic to account 
### for range-type filters like incidence angle too
collection = "RCMImageProducts"
# these filters are a common use-case for me
filters = {
    'Product Type': ('=', 'GRD'),
    'LUT Applied': ('=', 'Ice'),
}
# this geojson is provided too
features = [
    ('intersects', str(this_file.parent / 'assets' / 'lancaster_gate_30km_buffer_clip.geojson')),
]
# these dates produce results of just over 100 granules
dates = [
    {
        "start": "20241105_000000",
        "end": "20241118_000000"
    }
]

out_dir = Path('~/Downloads/eodms-beta-test').expanduser()
out_dir.mkdir(exist_ok=True)

### quick-n-dirty download function for concurrent use later
def order_and_download(api_obj, item_ids):
    for item in item_ids:
        item_info = api_obj.get_item(collection=collection, item_uuid=item)
        while 'download_url' not in item_info.keys():
            sleep(10)
            item_info = api_obj.get_item(collection=collection, item_uuid=item)
        api_obj.download_item(out_dir)
    return

### the hit-count is nice to have for sanity-checks prior to "real" search queries
rapi.search(collection=collection, filters=filters, features=features, dates=dates)
results = rapi.get_results(form='full') # need to use full form to get uuids

### note how if the query params are adjusted (or even just the search is repeated with same params), the number
### of results just goes up (due to how rapi appends results rather than replaces)
### https://github.com/eodms-sgdot/py-eodms-rapi/blob/20d249f5660398b7201ae8e9c73ee65b5714a676/eodms_rapi/eodms.py#L2751

### ddsapi needs the uuids which are stored in a couple of spots but this one seems easiest to manipulate
### need to check for Nones because rapi returns None for some reason?
uuids = list(set([r['metadataFullName'].split('/')[-1] for r in results if r is not None]))
# download results
# really filthy concurrent method
n_workers = 8
# split uuids into roughly-equivalent batches
batches = [uuids[i::n_workers] for i in range(n_workers)]
# create api object for each worker
apis = [dds.DDS_API(eodms_user, eodms_pwd, environment='prod') for _ in range(n_workers)]
with ThreadPoolExecutor(max_workers=n_workers) as executor:
    futures = [executor.submit(order_and_download, api, batch) for api, batch in zip(apis, batches)]
    results = [future.result() for future in futures]
package versions
dependencies:
  - asttokens=2.4.1
  - brotli-python=1.1.0
  - bzip2=1.0.8
  - ca-certificates=2024.8.30
  - certifi=2024.8.30
  - cffi=1.17.1
  - charset-normalizer=3.4.0
  - colorama=0.4.6
  - dateparser=1.2.0
  - decorator=5.1.1
  - exceptiongroup=1.2.2
  - executing=2.1.0
  - h2=4.1.0
  - hpack=4.0.0
  - hyperframe=6.0.1
  - idna=3.10
  - ipython=8.29.0
  - jedi=0.19.2
  - libexpat=2.6.4
  - libffi=3.4.2
  - libiconv=1.17
  - libmpdec=4.0.0
  - libsqlite=3.47.0
  - libxml2=2.13.5
  - libxslt=1.1.39
  - libzlib=1.3.1
  - lxml=5.3.0
  - matplotlib-inline=0.1.7
  - openssl=3.4.0
  - parso=0.8.4
  - pickleshare=0.7.5
  - pip=24.3.1
  - prompt-toolkit=3.0.48
  - pure_eval=0.2.3
  - pycparser=2.22
  - pygments=2.18.0
  - pysocks=1.7.1
  - python=3.13.0
  - python-dateutil=2.9.0.post0
  - python-tzdata=2024.2
  - python_abi=3.13
  - pytz=2024.2
  - regex=2024.11.6
  - requests=2.32.3
  - six=1.16.0
  - stack_data=0.6.2
  - tk=8.6.13
  - tqdm=4.67.1
  - traitlets=5.14.3
  - typing_extensions=4.12.2
  - tzdata=2024b
  - tzlocal=5.2
  - ucrt=10.0.22621.0
  - urllib3=2.2.3
  - vc=14.3
  - vc14_runtime=14.42.34433
  - vs2015_runtime=14.42.34433
  - wcwidth=0.2.13
  - win_inet_pton=1.1.0
  - xz=5.2.6
  - zstandard=0.23.0
  - zstd=1.5.6
  - pip:
      - click==8.1.7
      - geomet==1.1.0
      - py-eodms-dds==0.1.0
      - py-eodms-rapi==1.9.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant