Skip to content

Commit

Permalink
add fast_match argument in TargetTagalong class
Browse files Browse the repository at this point in the history
  • Loading branch information
araichoor committed Feb 14, 2024
1 parent 560b758 commit a14df0a
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 13 deletions.
6 changes: 5 additions & 1 deletion py/fiberassign/scripts/assign.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ def parse_assign(optlist=None):
parser.add_argument("--write_fits_numproc", required=False, default=0,
type=int,
help="if >0, then runs the write_assignment_fits() in parallel with numproc jobs (default=0)")
parser.add_argument("--fast_match", required=False, default=False,
type=bool,
help="use a fast method to match TARGETID in the TargetTagalong Class;"
"assumes there are no duplicates in TARGETID in the input files (default=False)")


args = None
Expand Down Expand Up @@ -331,7 +335,7 @@ def run_assign_init(args, plate_radec=True):
# Create empty target list
tgs = Targets()
# Create structure for carrying along auxiliary target data not needed by C++.
tagalong = create_tagalong(plate_radec=plate_radec)
tagalong = create_tagalong(plate_radec=plate_radec, fast_match=args.fast_match)

# Append each input target file. These target files must all be of the
# same survey type, and will set the Targets object to be of that survey.
Expand Down
37 changes: 25 additions & 12 deletions py/fiberassign/targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from desitarget.sv3.sv3_targetmask import desi_mask as sv3_mask

from desitarget.targets import main_cmx_or_sv
from desitarget.geomask import match

from .utils import Logger, Timer
from .hardware import radec2xy, cs52xy
Expand All @@ -54,7 +55,7 @@ class TargetTagalong(object):
to propagate to the output fiberassign files, and that are not
needed by the C++ layer.
'''
def __init__(self, columns, outnames={}, aliases={}):
def __init__(self, columns, outnames={}, aliases={}, fast_match=False):
'''
Create a new tag-along object.
Expand All @@ -64,10 +65,14 @@ def __init__(self, columns, outnames={}, aliases={}):
the column will be given in the output file; None to omit
from the output file.
*aliases*: dict, string to string: for get_for_ids(), column aliases.
*fast_match*: bool (default to False): use a fast method to match TARGETIDs
assumes there are no duplicates in TARGETIDs
[added in Feb. 2024]
'''
self.columns = columns
self.outnames = outnames
self.aliases = aliases
self.fast_match = fast_match
# Internally, we store one tuple for each targeting file read
# (to avoid manipulating/reformatting the arrays too much),
# where each tuple starts with the TARGETID of the targets, followed
Expand Down Expand Up @@ -129,16 +134,20 @@ def set_data(self, targetids, tabledata):
outarr[:] = defval
outarrs.append(outarr)
# Build output targetid-to-index map
outmap = dict([(tid,i) for i,tid in enumerate(targetids)])
if not self.fast_match:
outmap = dict([(tid,i) for i,tid in enumerate(targetids)])
# Go through my many data arrays
for thedata in self.data:
# TARGETIDs are the first element in the tuple
tids = thedata[0]
# Search for output array indices for these targetids
outinds = np.array([outmap.get(tid, -1) for tid in tids])
# Keep only the indices of targetids that were found
ininds = np.flatnonzero(outinds >= 0)
outinds = outinds[ininds]
if self.fast_match:
outinds, ininds = match(targetids, tids)
else:
outinds = np.array([outmap.get(tid, -1) for tid in tids])
# Keep only the indices of targetids that were found
ininds = np.flatnonzero(outinds >= 0)
outinds = outinds[ininds]
for outarr,inarr in zip(outarrs, thedata[1:]):
if outarr is None:
continue
Expand All @@ -160,19 +169,23 @@ def get_for_ids(self, targetids, names):
outarrs.append(np.zeros(len(targetids), dtype))
colinds.append(ic+1)
# Build output targetid-to-index map
outmap = dict([(tid,i) for i,tid in enumerate(targetids)])
if not self.fast_match:
outmap = dict([(tid,i) for i,tid in enumerate(targetids)])
# Go through my many data arrays
for thedata in self.data:
tids = thedata[0]
# Search for output array indices for these targetids
outinds = np.array([outmap.get(tid, -1) for tid in tids])
ininds = np.flatnonzero(outinds >= 0)
outinds = outinds[ininds]
if self.fast_match:
outinds, ininds = match(targetids, tids)
else:
outinds = np.array([outmap.get(tid, -1) for tid in tids])
ininds = np.flatnonzero(outinds >= 0)
outinds = outinds[ininds]
for outarr,ic in zip(outarrs, colinds):
outarr[outinds] = thedata[ic][ininds]
return outarrs

def create_tagalong(plate_radec=True):
def create_tagalong(plate_radec=True, fast_match=False):
cols = [
'TARGET_RA',
'TARGET_DEC',
Expand Down Expand Up @@ -200,7 +213,7 @@ def create_tagalong(plate_radec=True):

# (OBSCOND doesn't appear in all the fiberassign output HDUs,
# so we handle it specially)
return TargetTagalong(cols, outnames={'OBSCOND':None}, aliases=aliases)
return TargetTagalong(cols, outnames={'OBSCOND':None}, aliases=aliases, fast_match=fast_match)

def str_to_target_type(input):
if input == "science":
Expand Down

0 comments on commit a14df0a

Please sign in to comment.