user testing results #3

m9brady · 2024-12-05T16:45:14Z

Sharing my not-very-polished test suite. Hope it is helpful! Feel free to close at anytime.

General comments:

the DDS method of downloading is really fast! Much nicer experience than the order_id method.
the token auth method is good, I wonder if there'd be a better way than storing tokens locally though.

test status	script name	description
✅	test_01.py	same as rapi_dds_test given in repo but without click
✅	test_02.py	replication of a brief real use-case order (RCM GRD with Ice LUT for small temporal search window given AOI)
✅	test_03.py	similar to test_02 but with concurrent downloads over several threads
❌	test_04.py	similar to test_03 but with wider temporal search window. Issue documented in eodms-sgdot/py-eodms-rapi#50

test_01.py

from pathlib import Path
from netrc import netrc
from eodms_dds import dds
from eodms_rapi import EODMSRAPI
import os

def get_item(dds_api, collection, item_uuid, out_folder):

    dds_api.refresh_aaa()

    print(f"collection: {collection}")
    print(f"item_uuid: {item_uuid}")

    item_info = dds_api.get_item(collection, item_uuid)

    print(f"Item info: {item_info}")
    if item_info is None:
        return None

    if 'download_url' not in item_info.keys():
        return None

    dds_api.download_item(os.path.abspath(out_folder))

    return item_info

def extract_uuid(results):

    mdata_full_name = results.get('metadataFullName')
    uuid = os.path.basename(mdata_full_name)
    return uuid

def run(eodms_user, eodms_pwd, collection, env, out_folder):

    dds_api = dds.DDS_API(eodms_user, eodms_pwd, env)

    rapi = EODMSRAPI(eodms_user, eodms_pwd)

    filters = {'Beam Mode Type': ('LIKE', ['%50m%']),
            'Polarization': ('=', 'HH HV'),
            'Incidence Angle': ('>=', 17)}
    
    rapi.search(collection, filters)

    res = rapi.get_results('full')

    # print(f"res: {res[5]}")

    uuid = extract_uuid(res[5])

    item_info = get_item(dds_api, collection, uuid, out_folder)

if __name__ == "__main__":
    eodms_user, _, eodms_password = netrc(Path('~/.netrc').expanduser()).hosts['data.eodms-sgdot.nrcan-rncan.gc.ca']
    # use defaults from Kevin's test
    run(
        eodms_user=eodms_user,
        eodms_pwd=eodms_password, 
        collection="RCMImageProducts", 
        env="prod", 
        out_folder=os.path.expanduser("~/Downloads/eodms-beta-test")
    )

test_02.py

from netrc import netrc
from pathlib import Path
from time import sleep

from eodms_dds import dds
from eodms_rapi import EODMSRAPI

this_file = Path(__file__)
eodms_user, _, eodms_pwd = netrc(
    Path('~/.netrc').expanduser()
).hosts['data.eodms-sgdot.nrcan-rncan.gc.ca']
rapi = EODMSRAPI(eodms_user, eodms_pwd)
# search options 
### the nested tuples/lists/dicts is hard for me to understand but I get that they're necessary 
### for multi-select filters. There HAS to be a more user-friendly way, like if a list of product types is 
### provided, the search-api must be smart enough to use the right operator. Probably needs logic to account 
### for range-type filters like incidence angle too
collection = "RCMImageProducts"
filters = {
    'Product Type': ('=', 'GRD'),
    'LUT Applied': ('=', 'Ice'),
}
features = [
    ('intersects', str(this_file.parent / 'assets' / 'lancaster_gate_30km_buffer_clip.geojson')),
]
dates = [
    {
        "start": "20241101_000000",
        "end": "20241102_000000"
    }
]
### the hit-count kwarg is nice to have for sanity-checks prior to "real" search queries!
rapi.search(collection=collection, filters=filters, features=features, dates=dates)
results = rapi.get_results(form='full')
### right here is where I as a user would want an easy way to either convert results dict 
### to geodataframe or dump to geojson/shp/gpkg in order to narrow down the suitability of images
### since despite intersecting with the AOI it might be a tiny fraction. Using contains/within 
### probably won't help either in the initial query.
# ddsapi needs the uuids which are stored in a couple of spots but this one seems easiest to manipulate
uuids = [r['metadataFullName'].split('/')[-1] for r in results]
# download results
out_dir = Path('~/Downloads/eodms-beta-test').expanduser()
out_dir.mkdir(exist_ok=True)
dds_api = dds.DDS_API(eodms_user, eodms_pwd, environment="prod") # testing in prod! right on!
for item_id in uuids:
    print(item_id)
    item_info = dds_api.get_item(collection=collection, item_uuid=item_id)
    # wait for the download_url to appear in dict keys
    ### this polling is better than polling for EODMS order fulfillment - would be nice to be able to queue up N granules (N decided by account type?)
    while 'download_url' not in item_info.keys(): # could also just check dds_api.img_info? why bother returning item_info then?
        sleep(10)
        item_info = dds_api.get_item(collection=collection, item_uuid=item_id)
    ### download_item() is curious because it doesn't take an item_id but get_item() does...
    ### I guess because the DDS_API class has an img_info attribute that stores the result of get_item()
    ### but then why does get_item() return the json too? 
    dds_api.download_item(out_dir)

test_03.py

from netrc import netrc
from pathlib import Path
from time import sleep
from concurrent.futures import ThreadPoolExecutor

from eodms_dds import dds
from eodms_rapi import EODMSRAPI

this_file = Path(__file__)
eodms_user, _, eodms_pwd = netrc(
    Path('~/.netrc').expanduser()
).hosts['data.eodms-sgdot.nrcan-rncan.gc.ca']
rapi = EODMSRAPI(eodms_user, eodms_pwd)
# search options 
### the nested tuples/lists/dicts is hard for me to understand but I get that they're necessary 
### for multi-select filters. There HAS to be a more user-friendly way, like if a list of product types is 
### provided, the search-api must be smart enough to use the right operator. Probably needs logic to account 
### for range-type filters like incidence angle too
collection = "RCMImageProducts"
filters = {
    'Product Type': ('=', 'GRD'),
    'LUT Applied': ('=', 'Ice'),
}
features = [
    ('intersects', str(this_file.parent / 'assets' / 'lancaster_gate_30km_buffer_clip.geojson')),
]
dates = [
    {
        "start": "20241105_000000",
        "end": "20241106_000000"
    }
]

out_dir = Path('~/Downloads/eodms-beta-test').expanduser()
out_dir.mkdir(exist_ok=True)

### quick-n-dirty function for concurrent use later
def order_and_download(api_obj, item_ids):
    for item in item_ids:
        item_info = api_obj.get_item(collection=collection, item_uuid=item)
        while 'download_url' not in item_info.keys():
            sleep(10)
            item_info = api_obj.get_item(collection=collection, item_uuid=item)
        api_obj.download_item(out_dir)
    return

### the hit-count is nice to have for sanity-checks prior to "real" search queries
rapi.search(collection=collection, filters=filters, features=features, dates=dates)
results = rapi.get_results(form='full')

### note how if the query params are adjusted (or even just the search is repeated with same params), the number
### of results just goes up (due to how dds_api just appends results rather than replaces)

# ddsapi needs the uuids which are stored in a couple of spots but this one seems easiest to manipulate
### need to check for Nones because dds_api will just return None in a lot of cases?
uuids = list(set([r['metadataFullName'].split('/')[-1] for r in results if r is not None]))
# download results
# really filthy concurrent method
n_workers = 4
batches = [uuids[i::n_workers] for i in range(n_workers)]
apis = [dds.DDS_API(eodms_user, eodms_pwd, environment='prod') for _ in range(n_workers)]
with ThreadPoolExecutor(max_workers=n_workers) as executor:
    futures = [executor.submit(order_and_download, api, batch) for api, batch in zip(apis, batches)]
    results = [future.result() for future in futures]

test_04.py

from netrc import netrc
from pathlib import Path
from time import sleep
from concurrent.futures import ThreadPoolExecutor

from eodms_dds import dds
from eodms_rapi import EODMSRAPI

this_file = Path(__file__)
eodms_user, _, eodms_pwd = netrc(
    Path('~/.netrc').expanduser()
).hosts['data.eodms-sgdot.nrcan-rncan.gc.ca']
rapi = EODMSRAPI(eodms_user, eodms_pwd)
# search options 
### the nested tuples/lists/dicts is hard for me to understand but I get that they're necessary 
### for multi-select filters. There HAS to be a more user-friendly way, like if a list of product types is 
### provided, the search-api must be smart enough to use the right operator. Probably needs logic to account 
### for range-type filters like incidence angle too
collection = "RCMImageProducts"
# these filters are a common use-case for me
filters = {
    'Product Type': ('=', 'GRD'),
    'LUT Applied': ('=', 'Ice'),
}
# this geojson is provided too
features = [
    ('intersects', str(this_file.parent / 'assets' / 'lancaster_gate_30km_buffer_clip.geojson')),
]
# these dates produce results of just over 100 granules
dates = [
    {
        "start": "20241105_000000",
        "end": "20241118_000000"
    }
]

out_dir = Path('~/Downloads/eodms-beta-test').expanduser()
out_dir.mkdir(exist_ok=True)

### quick-n-dirty download function for concurrent use later
def order_and_download(api_obj, item_ids):
    for item in item_ids:
        item_info = api_obj.get_item(collection=collection, item_uuid=item)
        while 'download_url' not in item_info.keys():
            sleep(10)
            item_info = api_obj.get_item(collection=collection, item_uuid=item)
        api_obj.download_item(out_dir)
    return

### the hit-count is nice to have for sanity-checks prior to "real" search queries
rapi.search(collection=collection, filters=filters, features=features, dates=dates)
results = rapi.get_results(form='full') # need to use full form to get uuids

### note how if the query params are adjusted (or even just the search is repeated with same params), the number
### of results just goes up (due to how rapi appends results rather than replaces)
### https://github.com/eodms-sgdot/py-eodms-rapi/blob/20d249f5660398b7201ae8e9c73ee65b5714a676/eodms_rapi/eodms.py#L2751

### ddsapi needs the uuids which are stored in a couple of spots but this one seems easiest to manipulate
### need to check for Nones because rapi returns None for some reason?
uuids = list(set([r['metadataFullName'].split('/')[-1] for r in results if r is not None]))
# download results
# really filthy concurrent method
n_workers = 8
# split uuids into roughly-equivalent batches
batches = [uuids[i::n_workers] for i in range(n_workers)]
# create api object for each worker
apis = [dds.DDS_API(eodms_user, eodms_pwd, environment='prod') for _ in range(n_workers)]
with ThreadPoolExecutor(max_workers=n_workers) as executor:
    futures = [executor.submit(order_and_download, api, batch) for api, batch in zip(apis, batches)]
    results = [future.result() for future in futures]

package versions

dependencies:
  - asttokens=2.4.1
  - brotli-python=1.1.0
  - bzip2=1.0.8
  - ca-certificates=2024.8.30
  - certifi=2024.8.30
  - cffi=1.17.1
  - charset-normalizer=3.4.0
  - colorama=0.4.6
  - dateparser=1.2.0
  - decorator=5.1.1
  - exceptiongroup=1.2.2
  - executing=2.1.0
  - h2=4.1.0
  - hpack=4.0.0
  - hyperframe=6.0.1
  - idna=3.10
  - ipython=8.29.0
  - jedi=0.19.2
  - libexpat=2.6.4
  - libffi=3.4.2
  - libiconv=1.17
  - libmpdec=4.0.0
  - libsqlite=3.47.0
  - libxml2=2.13.5
  - libxslt=1.1.39
  - libzlib=1.3.1
  - lxml=5.3.0
  - matplotlib-inline=0.1.7
  - openssl=3.4.0
  - parso=0.8.4
  - pickleshare=0.7.5
  - pip=24.3.1
  - prompt-toolkit=3.0.48
  - pure_eval=0.2.3
  - pycparser=2.22
  - pygments=2.18.0
  - pysocks=1.7.1
  - python=3.13.0
  - python-dateutil=2.9.0.post0
  - python-tzdata=2024.2
  - python_abi=3.13
  - pytz=2024.2
  - regex=2024.11.6
  - requests=2.32.3
  - six=1.16.0
  - stack_data=0.6.2
  - tk=8.6.13
  - tqdm=4.67.1
  - traitlets=5.14.3
  - typing_extensions=4.12.2
  - tzdata=2024b
  - tzlocal=5.2
  - ucrt=10.0.22621.0
  - urllib3=2.2.3
  - vc=14.3
  - vc14_runtime=14.42.34433
  - vs2015_runtime=14.42.34433
  - wcwidth=0.2.13
  - win_inet_pton=1.1.0
  - xz=5.2.6
  - zstandard=0.23.0
  - zstd=1.5.6
  - pip:
      - click==8.1.7
      - geomet==1.1.0
      - py-eodms-dds==0.1.0
      - py-eodms-rapi==1.9.0

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

user testing results #3

user testing results #3

m9brady commented Dec 5, 2024

user testing results #3

user testing results #3

Comments

m9brady commented Dec 5, 2024