Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-38850: Make trailedAssociatorTask #173

Merged
merged 3 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/lsst/ap/association/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from .version import *
from .trailedSourceFilter import *
from .association import *
from .diaForcedSource import *
from .loadDiaCatalogs import *
Expand Down
59 changes: 45 additions & 14 deletions python/lsst/ap/association/association.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import lsst.pex.config as pexConfig
import lsst.pipe.base as pipeBase
from lsst.utils.timer import timeMethod
from .trailedSourceFilter import TrailedSourceFilterTask

# Enforce an error for unsafe column/array value setting in pandas.
pd.options.mode.chained_assignment = 'raise'
Expand All @@ -40,13 +41,27 @@
class AssociationConfig(pexConfig.Config):
"""Config class for AssociationTask.
"""

maxDistArcSeconds = pexConfig.Field(
dtype=float,
doc='Maximum distance in arcseconds to test for a DIASource to be a '
'match to a DIAObject.',
doc="Maximum distance in arcseconds to test for a DIASource to be a "
"match to a DIAObject.",
default=1.0,
)

trailedSourceFilter = pexConfig.ConfigurableField(
target=TrailedSourceFilterTask,
doc="Subtask to remove long trailed sources based on catalog source "
"morphological measurements.",
)

bsmartradio marked this conversation as resolved.
Show resolved Hide resolved
doTrailedSourceFilter = pexConfig.Field(
doc="Run traildeSourceFilter to remove long trailed sources from "
"output catalog.",
dtype=bool,
default=True,
)


class AssociationTask(pipeBase.Task):
"""Associate DIAOSources into existing DIAObjects.
Expand All @@ -60,10 +75,16 @@ class AssociationTask(pipeBase.Task):
ConfigClass = AssociationConfig
_DefaultName = "association"

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.config.doTrailedSourceFilter:
self.makeSubtask("trailedSourceFilter")

@timeMethod
def run(self,
diaSources,
diaObjects):
diaObjects,
exposure_time=None):
"""Associate the new DiaSources with existing DiaObjects.

Parameters
Expand All @@ -72,22 +93,24 @@ def run(self,
New DIASources to be associated with existing DIAObjects.
diaObjects : `pandas.DataFrame`
Existing diaObjects from the Apdb.
exposure_time : `float`, optional
Exposure time from difference image.

Returns
-------
result : `lsst.pipe.base.Struct`
Results struct with components.

- ``"matchedDiaSources"`` : DiaSources that were matched. Matched
- ``matchedDiaSources`` : DiaSources that were matched. Matched
Sources have their diaObjectId updated and set to the id of the
diaObject they were matched to. (`pandas.DataFrame`)
- ``"unAssocDiaSources"`` : DiaSources that were not matched.
- ``unAssocDiaSources`` : DiaSources that were not matched.
Unassociated sources have their diaObject set to 0 as they
were not associated with any existing DiaObjects.
(`pandas.DataFrame`)
- ``"nUpdatedDiaObjects"`` : Number of DiaObjects that were
- ``nUpdatedDiaObjects`` : Number of DiaObjects that were
matched to new DiaSources. (`int`)
- ``"nUnassociatedDiaObjects"`` : Number of DiaObjects that were
- ``nUnassociatedDiaObjects`` : Number of DiaObjects that were
not matched a new DiaSource. (`int`)
"""
diaSources = self.check_dia_source_radec(diaSources)
Expand All @@ -98,7 +121,15 @@ def run(self,
nUpdatedDiaObjects=0,
nUnassociatedDiaObjects=0)

matchResult = self.associate_sources(diaObjects, diaSources)
if self.config.doTrailedSourceFilter:
diaTrailedResult = self.trailedSourceFilter.run(diaSources, exposure_time)
matchResult = self.associate_sources(diaObjects, diaTrailedResult.diaSources)

self.log.info("%i DIASources exceed max_trail_length, dropping "
"from source catalog." % len(diaTrailedResult.trailedDiaSources))

else:
matchResult = self.associate_sources(diaObjects, diaSources)

mask = matchResult.diaSources["diaObjectId"] != 0

Expand Down Expand Up @@ -157,11 +188,11 @@ def associate_sources(self, dia_objects, dia_sources):
result : `lsst.pipe.base.Struct`
Results struct with components.

- ``"diaSources"`` : Full set of diaSources both matched and not.
- ``diaSources`` : Full set of diaSources both matched and not.
(`pandas.DataFrame`)
- ``"nUpdatedDiaObjects"`` : Number of DiaObjects that were
- ``nUpdatedDiaObjects`` : Number of DiaObjects that were
associated. (`int`)
- ``"nUnassociatedDiaObjects"`` : Number of DiaObjects that were
- ``nUnassociatedDiaObjects`` : Number of DiaObjects that were
not matched a new DiaSource. (`int`)
"""
scores = self.score(
Expand Down Expand Up @@ -196,11 +227,11 @@ def score(self, dia_objects, dia_sources, max_dist):
result : `lsst.pipe.base.Struct`
Results struct with components:

- ``"scores"``: array of floats of match quality updated DIAObjects
- ``scores``: array of floats of match quality updated DIAObjects
(array-like of `float`).
- ``"obj_idxs"``: indexes of the matched DIAObjects in the catalog.
- ``obj_idxs``: indexes of the matched DIAObjects in the catalog.
(array-like of `int`)
- ``"obj_ids"``: array of floats of match quality updated DIAObjects
- ``obj_ids``: array of floats of match quality updated DIAObjects
(array-like of `int`).

Default values for these arrays are
Expand Down
12 changes: 6 additions & 6 deletions python/lsst/ap/association/diaPipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
Currently loads directly from the Apdb rather than pre-loading.
"""

__all__ = ("DiaPipelineConfig",
"DiaPipelineTask",
"DiaPipelineConnections")
parejkoj marked this conversation as resolved.
Show resolved Hide resolved

import pandas as pd

import lsst.dax.apdb as daxApdb
Expand All @@ -44,10 +48,6 @@
PackageAlertsTask)
from lsst.ap.association.ssoAssociation import SolarSystemAssociationTask

__all__ = ("DiaPipelineConfig",
"DiaPipelineTask",
"DiaPipelineConnections")


class DiaPipelineConnections(
pipeBase.PipelineTaskConnections,
Expand Down Expand Up @@ -367,8 +367,8 @@ def run(self,
loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb)

# Associate new DiaSources with existing DiaObjects.
assocResults = self.associator.run(diaSourceTable,
loaderResult.diaObjects)
assocResults = self.associator.run(diaSourceTable, loaderResult.diaObjects,
exposure_time=diffIm.visitInfo.exposureTime)
if self.config.doSolarSystemAssociation:
ssoAssocResult = self.solarSystemAssociator.run(
assocResults.unAssocDiaSources,
Expand Down
8 changes: 4 additions & 4 deletions python/lsst/ap/association/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,11 @@ def makeMeasurement(self, values):
A `dict` representation of the metadata. Each `dict` has the
following keys:

``"updatedObjects"``
``updatedObjects``
The number of DIAObjects updated for this image (`int` or
`None`). May be `None` if the image was not
successfully associated.
``"unassociatedObjects"``
``unassociatedObjects``
The number of DIAObjects not associated with a DiaSource in
this image (`int` or `None`). May be `None` if the image was
not successfully associated.
Expand Down Expand Up @@ -216,7 +216,7 @@ def makeMeasurement(self, values):
A `dict` representation of the metadata. Each `dict` has the
following key:

``"numTotalSolarSystemObjects"``
``numTotalSolarSystemObjects``
The number of SolarSystemObjects within the observable detector
area (`int` or `None`). May be `None` if solar system
association was not attempted or the image was not
Expand Down Expand Up @@ -264,7 +264,7 @@ def makeMeasurement(self, values):
A `dict` representation of the metadata. Each `dict` has the
following key:

``"numAssociatedSsObjects"``
``numAssociatedSsObjects``
The number of successfully associated SolarSystem Objects
(`int` or `None`). May be `None` if solar system association
was not attempted or the image was not successfully associated.
Expand Down
36 changes: 18 additions & 18 deletions python/lsst/ap/association/skyBotEphemerisQuery.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,43 +126,43 @@ def run(self, visitInfos, visit):
details see
https://ssp.imcce.fr/webservices/skybot/api/conesearch/#output-results

``"Num"``
``Num``
object number (`int`, optional)
``"Name"``
``Name``
object name (`str`)
``"RA(h)"``
``RA(h)``
RA in HMS (`str`)
``"DE(deg)"``
``DE(deg)``
DEC in DMS (`str`)
``"Class"``
``Class``
Minor planet classification (`str`)
``"Mv"``
``Mv``
visual magnitude (`float`)
``"Err(arcsec)"``
``Err(arcsec)``
position error (`float`)
``"d(arcsec)"``
``d(arcsec)``
distance from exposure boresight (`float`)?
``"dRA(arcsec/h)"``
``dRA(arcsec/h)``
proper motion in RA (`float`)
``"dDEC(arcsec/h)"``
``dDEC(arcsec/h)``
proper motion in DEC (`float`)
``"Dg(ua)"``
``Dg(ua)``
geocentric distance (`float`)
``"Dh(ua)"``
``Dh(ua)``
heliocentric distance (`float`)
``"Phase(deg)"``
``Phase(deg)``
phase angle (`float`)
``"SunElong(deg)"``
``SunElong(deg)``
solar elongation (`float`)
``"ra"``
``ra``
RA in decimal degrees (`float`)
``"dec"``
``dec``
DEC in decimal degrees (`float`)
``"ssObjectId"``
``ssObjectId``
unique minor planet ID for internal use (`int`). Shared
across catalogs; the pair ``(ssObjectId, visitId)`` is
globally unique.
``"visitId"``
``visitId``
a copy of ``visit`` (`int`)
"""
# Grab the visitInfo from the raw to get the information needed on the
Expand Down
110 changes: 110 additions & 0 deletions python/lsst/ap/association/trailedSourceFilter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# This file is part of ap_association.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

__all__ = ("TrailedSourceFilterTask", "TrailedSourceFilterConfig")

import lsst.pex.config as pexConfig
import lsst.pipe.base as pipeBase
from lsst.utils.timer import timeMethod


class TrailedSourceFilterConfig(pexConfig.Config):
"""Config class for TrailedSourceFilterTask.
"""

max_trail_length = pexConfig.Field(
dtype=float,
doc="Length of long trailed sources to remove from the input catalog, "
"in arcseconds per second. Default comes from DMTN-199, which "
"requires removal of sources with trails longer than 10 "
"degrees/day, which is 36000/3600/24 arcsec/second, or roughly"
"0.416 arcseconds per second.",
default=36000/3600.0/24.0,
)


class TrailedSourceFilterTask(pipeBase.Task):
"""Find trailed sources in DIASources and filter them as per DMTN-199
guidelines.

This task checks the length of trailLength in the DIASource catalog using
a given arcsecond/second rate from max_trail_length and the exposure time.
The two values are used to calculate the maximum allowed trail length and
filters out any trail longer than the maximum. The max_trail_length is
outlined in DMTN-199 and determines the default value.
"""

ConfigClass = TrailedSourceFilterConfig
_DefaultName = "trailedSourceFilter"

@timeMethod
def run(self, dia_sources, exposure_time):
"""Remove trailed sources longer than ``config.max_trail_length`` from
the input catalog.

Parameters
----------
dia_sources : `pandas.DataFrame`
New DIASources to be checked for trailed sources.
exposure_time : `float`
Exposure time from difference image.

Returns
-------
result : `lsst.pipe.base.Struct`
Results struct with components.

- ``dia_sources`` : DIASource table that is free from unwanted
trailed sources. (`pandas.DataFrame`)
Comment on lines +75 to +76
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
- ``dia_sources`` : DIASource table that is free from unwanted
trailed sources. (`pandas.DataFrame`)
- ``dia_sources`` : DIASource table that has had long trailed sources removed. (`pandas.DataFrame`)


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no newline here

- ``trailed_dia_sources`` : DIASources that have trails which
exceed max_trail_length/second*exposure_time.
(`pandas.DataFrame`)
"""
trail_mask = self._check_dia_source_trail(dia_sources, exposure_time)

return pipeBase.Struct(
diaSources=dia_sources[~trail_mask].reset_index(drop=True),
trailedDiaSources=dia_sources[trail_mask].reset_index(drop=True))

def _check_dia_source_trail(self, dia_sources, exposure_time):
"""Find DiaSources that have long trails.

Return a mask of sources with lengths greater than
``config.max_trail_length`` multiplied by the exposure time.

Parameters
----------
dia_sources : `pandas.DataFrame`
Input DIASources to check for trail lengths.
exposure_time : `float`
Exposure time from difference image.

Returns
-------
trail_mask : `pandas.DataFrame`
Boolean mask for DIASources which are greater than the
cutoff length.
"""
trail_mask = (dia_sources.loc[:, "trailLength"].values[:]
>= (self.config.max_trail_length*exposure_time))

return trail_mask
Loading