-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor codebase for new euclid workflow usage with tests
- Loading branch information
Showing
27 changed files
with
528 additions
and
120 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# frozen_string_literal: true | ||
|
||
module LabelExtractors | ||
class BaseExtractor | ||
|
||
class UnknownTaskKey < StandardError; end | ||
class UnknownLabelKey < StandardError; end | ||
attr_reader :task_lookup_key, :task_prefix_label | ||
|
||
def initialize(task_lookup_key) | ||
@task_lookup_key = task_lookup_key | ||
@task_prefix_label = task_prefix | ||
end | ||
|
||
# extract the keys from the reduction data payload hash | ||
# and convert the keys to the workflow question tasks | ||
# | ||
# e.g. workflow type (GZ) are question type 'decision tree' tasks | ||
# looking at the 'T0' task it correlates to 3 exclusive answers: | ||
# 0 (smooth) | ||
# 1 (features or disk) | ||
# 2 (star or artifact) | ||
# | ||
# then combined with the label prefix used to identify the correlated task name for Zoobot | ||
def extract(data_hash) | ||
data_hash.transform_keys do |key| | ||
# create the lable key used for column headers in the derived training catalogues | ||
# note the hyphen and underscore formatting, see Zoobot label schema for more details | ||
"#{task_prefix_label}-#{data_release_suffix}_#{data_payload_label(key)}" | ||
end | ||
end | ||
|
||
def self.label_prefixes | ||
self::TASK_KEY_LABEL_PREFIXES | ||
end | ||
|
||
def self.data_labels | ||
self::TASK_KEY_DATA_LABELS | ||
end | ||
|
||
# Base version of question_answers_schema method to be customized by subclasses | ||
def self.question_answers_schema | ||
raise NotImplementedError, "Subclass must define `question_answers_schema`" | ||
end | ||
|
||
private | ||
|
||
def task_prefix | ||
prefix = self.class::TASK_KEY_LABEL_PREFIXES[task_lookup_key] | ||
raise UnknownTaskKey, "key not found: #{task_lookup_key}" unless prefix | ||
|
||
prefix | ||
end | ||
|
||
def data_payload_label(key) | ||
label = self.class::TASK_KEY_DATA_LABELS.dig(task_lookup_key, key) | ||
raise UnknownLabelKey, "key not found: #{key}" unless label | ||
|
||
label | ||
end | ||
|
||
def data_release_suffix | ||
self.class::data_release_suffix | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
# frozen_string_literal: true | ||
|
||
module LabelExtractors | ||
module GalaxyZoo | ||
class Euclid < LabelExtractors::Shared::CosmicDawnAndEuclid | ||
|
||
attr_reader :task_lookup_key, :task_prefix_label | ||
|
||
# Derived to conform to the existing catalogue schema for Zoobot euclid | ||
# https://github.com/mwalmsley/galaxy-datasets/blob/eed30d3e37b5559d0427c339e8dc1d2a9dc2d004/galaxy_datasets/shared/label_metadata.py#L462 | ||
TASK_KEY_LABEL_PREFIXES = { | ||
'T0' => 'smooth-or-featured', | ||
'T1' => 'how-rounded', | ||
'T2' => 'disk-edge-on', | ||
'T3' => 'edge-on-bulge', | ||
'T4' => 'bar', | ||
'T5' => 'has-spiral-arms', | ||
'T6' => 'spiral-winding', | ||
'T7' => 'spiral-arm-count', | ||
'T8' => 'bulge-size', | ||
'T11' => 'merging', # T10 is not used for training and no T9 in prod :shrug: | ||
'T12' => 'lensing', | ||
'T13' => 'clumps', | ||
'T14' => 'problem', | ||
'T15' => 'artifact' | ||
}.freeze | ||
TASK_KEY_DATA_LABELS = { | ||
'T0' => { | ||
'0' => 'smooth', | ||
'1' => 'featured-or-disk', | ||
'2' => 'problem' | ||
}, | ||
'T1' => { | ||
'0' => 'round', | ||
'1' => 'in-between', | ||
'2' => 'cigar-shaped' | ||
}, | ||
'T2' => { | ||
'0' => 'yes', | ||
'1' => 'no' | ||
}, | ||
'T3' => { | ||
'0' => 'rounded', | ||
'1' => 'boxy', | ||
'2' => 'none' | ||
}, | ||
'T4' => { | ||
'0' => 'no', | ||
'1' => 'weak', | ||
'2' => 'strong' | ||
}, | ||
'T5' => { | ||
'0' => 'yes', | ||
'1' => 'no' | ||
}, | ||
'T6' => { | ||
'0' => 'tight', | ||
'1' => 'medium', | ||
'2' => 'loose' | ||
}, | ||
'T7' => { | ||
'0' => '1', | ||
'1' => '2', | ||
'2' => '3', | ||
'3' => '4', | ||
'4' => 'more-than-4', | ||
'5' => 'cant-tell' | ||
}, | ||
'T8' => { | ||
'0' => 'none', | ||
'1' => 'small', | ||
'2' => 'moderate', | ||
'3' => 'large', | ||
'4' => 'dominant' | ||
}, | ||
'T11' => { | ||
'0' => 'merger', | ||
'1' => 'major-disturbance', | ||
'2' => 'minor-disturbance', | ||
'3' => 'none' | ||
}, | ||
'T12' => { | ||
'0' => 'yes', | ||
'1' => 'no' | ||
}, | ||
'T13' => { | ||
'0' => 'yes', | ||
'1' => 'no' | ||
}, | ||
'T14' => { | ||
'0' => 'star', | ||
'1' => 'artifact', | ||
'2' => 'zoom' | ||
}, | ||
'T15' => { | ||
'0' => 'saturation', | ||
'1' => 'diffraction', | ||
'2' => 'satellite', | ||
'3' => 'ray', | ||
'4' => 'scattered', | ||
'5' => 'other', | ||
'6' => 'ghost' | ||
} | ||
}.freeze | ||
|
||
DATA_RELEASE_SUFFIX = 'euclid' | ||
|
||
private | ||
def self.data_release_suffix | ||
DATA_RELEASE_SUFFIX | ||
end | ||
end | ||
end | ||
end |
23 changes: 23 additions & 0 deletions
23
app/modules/label_extractors/shared/cosmic_dawn_and_euclid.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# frozen_string_literal: true | ||
require_relative '../base_extractor' | ||
|
||
module LabelExtractors | ||
module Shared | ||
class CosmicDawnAndEuclid < BaseExtractor | ||
|
||
def self.data_release_suffix | ||
raise NotImplementedError, "Subclass must define `data_release_suffix`" | ||
end | ||
|
||
# provide a flat task question and answers list for the decals mission catalogues | ||
def self.question_answers_schema | ||
label_prefixes.map do |task_key, question_prefix| | ||
data_labels[task_key].values.map do |answer_suffix| | ||
"#{question_prefix}-#{data_release_suffix}_#{answer_suffix}" | ||
end | ||
end.flatten | ||
end | ||
|
||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.