From f666b782bdf98fcc4b229bdeab4e0243efbac36d Mon Sep 17 00:00:00 2001 From: tooyosi Date: Tue, 5 Nov 2024 15:42:58 +0000 Subject: [PATCH] add base_extractor to galaxyzoo module, use class label keys and prefexises for extractor tests, remove error on baseclass --- .../label_extractors/base_extractor.rb | 66 ----------- .../galaxy_zoo/base_extractor.rb | 65 +++++++++++ .../label_extractors/galaxy_zoo/decals.rb | 1 - .../shared/cosmic_dawn_and_euclid.rb | 3 +- lib/bajor/client.rb | 2 +- spec/modules/label_extractors/finder_spec.rb | 2 +- .../{ => galaxy_zoo}/base_extractor_spec.rb | 8 +- .../galaxy_zoo/cosmic_dawn_spec.rb | 103 +---------------- .../galaxy_zoo/decals_spec.rb | 78 +------------ .../galaxy_zoo/euclid_spec.rb | 104 +----------------- 10 files changed, 84 insertions(+), 348 deletions(-) delete mode 100644 app/modules/label_extractors/base_extractor.rb create mode 100644 app/modules/label_extractors/galaxy_zoo/base_extractor.rb rename spec/modules/label_extractors/{ => galaxy_zoo}/base_extractor_spec.rb (87%) diff --git a/app/modules/label_extractors/base_extractor.rb b/app/modules/label_extractors/base_extractor.rb deleted file mode 100644 index 5a84ff3..0000000 --- a/app/modules/label_extractors/base_extractor.rb +++ /dev/null @@ -1,66 +0,0 @@ -# frozen_string_literal: true - -module LabelExtractors - class BaseExtractor - - class UnknownTaskKey < StandardError; end - class UnknownLabelKey < StandardError; end - attr_reader :task_lookup_key, :task_prefix_label - - def initialize(task_lookup_key) - @task_lookup_key = task_lookup_key - @task_prefix_label = task_prefix - end - - # extract the keys from the reduction data payload hash - # and convert the keys to the workflow question tasks - # - # e.g. workflow type (GZ) are question type 'decision tree' tasks - # looking at the 'T0' task it correlates to 3 exclusive answers: - # 0 (smooth) - # 1 (features or disk) - # 2 (star or artifact) - # - # then combined with the label prefix used to identify the correlated task name for Zoobot - def extract(data_hash) - data_hash.transform_keys do |key| - # create the lable key used for column headers in the derived training catalogues - # note the hyphen and underscore formatting, see Zoobot label schema for more details - "#{task_prefix_label}-#{data_release_suffix}_#{data_payload_label(key)}" - end - end - - def self.label_prefixes - self::TASK_KEY_LABEL_PREFIXES - end - - def self.data_labels - self::TASK_KEY_DATA_LABELS - end - - # Base version of question_answers_schema method to be customized by subclasses - def self.question_answers_schema - raise NotImplementedError, "Subclass must define `question_answers_schema`" - end - - private - - def task_prefix - prefix = self.class::TASK_KEY_LABEL_PREFIXES[task_lookup_key] - raise UnknownTaskKey, "key not found: #{task_lookup_key}" unless prefix - - prefix - end - - def data_payload_label(key) - label = self.class::TASK_KEY_DATA_LABELS.dig(task_lookup_key, key) - raise UnknownLabelKey, "key not found: #{key}" unless label - - label - end - - def data_release_suffix - self.class::data_release_suffix - end - end -end diff --git a/app/modules/label_extractors/galaxy_zoo/base_extractor.rb b/app/modules/label_extractors/galaxy_zoo/base_extractor.rb new file mode 100644 index 0000000..73f98a2 --- /dev/null +++ b/app/modules/label_extractors/galaxy_zoo/base_extractor.rb @@ -0,0 +1,65 @@ +# frozen_string_literal: true + +module LabelExtractors + module GalaxyZoo + class BaseExtractor + attr_reader :task_lookup_key, :task_prefix_label + + def initialize(task_lookup_key) + @task_lookup_key = task_lookup_key + @task_prefix_label = task_prefix + end + + # extract the keys from the reduction data payload hash + # and convert the keys to the workflow question tasks + # + # e.g. workflow type (GZ) are question type 'decision tree' tasks + # looking at the 'T0' task it correlates to 3 exclusive answers: + # 0 (smooth) + # 1 (features or disk) + # 2 (star or artifact) + # + # then combined with the label prefix used to identify the correlated task name for Zoobot + def extract(data_hash) + data_hash.transform_keys do |key| + # create the lable key used for column headers in the derived training catalogues + # note the hyphen and underscore formatting, see Zoobot label schema for more details + "#{task_prefix_label}-#{data_release_suffix}_#{data_payload_label(key)}" + end + end + + def self.label_prefixes + self::TASK_KEY_LABEL_PREFIXES + end + + def self.data_labels + self::TASK_KEY_DATA_LABELS + end + + # Base version of question_answers_schema method to be customized by subclasses + def self.question_answers_schema + raise NotImplementedError, "Subclass must define `question_answers_schema`" + end + + private + + def task_prefix + prefix = self.class::TASK_KEY_LABEL_PREFIXES[task_lookup_key] + raise UnknownTaskKey, "key not found: #{task_lookup_key}" unless prefix + + prefix + end + + def data_payload_label(key) + label = self.class::TASK_KEY_DATA_LABELS.dig(task_lookup_key, key) + raise UnknownLabelKey, "key not found: #{key}" unless label + + label + end + + def data_release_suffix + self.class::data_release_suffix + end + end + end +end diff --git a/app/modules/label_extractors/galaxy_zoo/decals.rb b/app/modules/label_extractors/galaxy_zoo/decals.rb index e2cede7..eb40867 100644 --- a/app/modules/label_extractors/galaxy_zoo/decals.rb +++ b/app/modules/label_extractors/galaxy_zoo/decals.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -require_relative '../base_extractor' module LabelExtractors module GalaxyZoo diff --git a/app/modules/label_extractors/shared/cosmic_dawn_and_euclid.rb b/app/modules/label_extractors/shared/cosmic_dawn_and_euclid.rb index 322dde7..3f00c65 100644 --- a/app/modules/label_extractors/shared/cosmic_dawn_and_euclid.rb +++ b/app/modules/label_extractors/shared/cosmic_dawn_and_euclid.rb @@ -1,9 +1,8 @@ # frozen_string_literal: true -require_relative '../base_extractor' module LabelExtractors module Shared - class CosmicDawnAndEuclid < BaseExtractor + class CosmicDawnAndEuclid < LabelExtractors::GalaxyZoo::BaseExtractor def self.data_release_suffix raise NotImplementedError, "Subclass must define `data_release_suffix`" diff --git a/lib/bajor/client.rb b/lib/bajor/client.rb index 899592e..20517d6 100644 --- a/lib/bajor/client.rb +++ b/lib/bajor/client.rb @@ -25,7 +25,7 @@ def create_training_job(manifest_path, workflow_name='cosmic_dawn') '/training/jobs/', # NOTE: Here we can augment the batch job run options via bajor # via the `run_opts: '--wandb --debug'` etc, these could be set via ENV - body: { manifest_path: manifest_path, opts: { 'run_opts': "--schema #{workflow_name}", 'workflow_name': workflow_name } }.to_json, + body: { manifest_path: manifest_path, opts: { 'run_opts': "--schema #{workflow_name.downcase}", 'workflow_name': workflow_name } }.to_json, headers: JSON_HEADERS ) diff --git a/spec/modules/label_extractors/finder_spec.rb b/spec/modules/label_extractors/finder_spec.rb index 8cf1ab2..3c5fc6c 100644 --- a/spec/modules/label_extractors/finder_spec.rb +++ b/spec/modules/label_extractors/finder_spec.rb @@ -25,7 +25,7 @@ it 'raises an error if the task key is not known for the label schema' do expect { described_class.extractor_instance('galaxy_zoo_cosmic_dawn_t50') - }.to raise_error(LabelExtractors::BaseExtractor::UnknownTaskKey, 'key not found: T50') + }.to raise_error(LabelExtractors::GalaxyZoo::UnknownTaskKey, 'key not found: T50') end it 'finds the decals mission data' do diff --git a/spec/modules/label_extractors/base_extractor_spec.rb b/spec/modules/label_extractors/galaxy_zoo/base_extractor_spec.rb similarity index 87% rename from spec/modules/label_extractors/base_extractor_spec.rb rename to spec/modules/label_extractors/galaxy_zoo/base_extractor_spec.rb index a9896e6..0765907 100644 --- a/spec/modules/label_extractors/base_extractor_spec.rb +++ b/spec/modules/label_extractors/galaxy_zoo/base_extractor_spec.rb @@ -1,6 +1,6 @@ # spec/models/label_extractors/base_extractor_spec.rb -class TestExtractor < LabelExtractors::BaseExtractor +class TestExtractor < LabelExtractors::GalaxyZoo::BaseExtractor TASK_KEY_LABEL_PREFIXES = { task_key_example: "example_prefix" } TASK_KEY_DATA_LABELS = { task_key_example: { "T0" => "example_label" } } def self.data_release_suffix @@ -8,7 +8,7 @@ def self.data_release_suffix end end -RSpec.describe LabelExtractors::BaseExtractor do +RSpec.describe LabelExtractors::GalaxyZoo::BaseExtractor do let(:task_lookup_key) { :task_key_example } let(:data_hash) { { "T0" => "example_key" } } let(:instance) { TestExtractor.new(task_lookup_key) } @@ -35,7 +35,7 @@ def self.data_release_suffix it 'raises an error for an unknown task_lookup_key' do invalid_instance = TestExtractor.allocate # Skips calling initialize allow(invalid_instance).to receive(:task_lookup_key).and_return(:invalid_key) - expect { invalid_instance.send(:task_prefix) }.to raise_error(LabelExtractors::BaseExtractor::UnknownTaskKey) + expect { invalid_instance.send(:task_prefix) }.to raise_error(LabelExtractors::GalaxyZoo::UnknownTaskKey) end end @@ -45,7 +45,7 @@ def self.data_release_suffix end it 'raises an error for an unknown key' do - expect { instance.send(:data_payload_label, "unknown_key") }.to raise_error(LabelExtractors::BaseExtractor::UnknownLabelKey) + expect { instance.send(:data_payload_label, "unknown_key") }.to raise_error(LabelExtractors::GalaxyZoo::UnknownLabelKey) end end end diff --git a/spec/modules/label_extractors/galaxy_zoo/cosmic_dawn_spec.rb b/spec/modules/label_extractors/galaxy_zoo/cosmic_dawn_spec.rb index 931ca34..461814c 100644 --- a/spec/modules/label_extractors/galaxy_zoo/cosmic_dawn_spec.rb +++ b/spec/modules/label_extractors/galaxy_zoo/cosmic_dawn_spec.rb @@ -3,103 +3,8 @@ require 'rails_helper' RSpec.describe LabelExtractors::GalaxyZoo::CosmicDawn do - let(:data_label_schema) do - { - 'T0' => { - '0' => 'smooth', - '1' => 'featured-or-disk', - '2' => 'problem' - }, - 'T1' => { - '0' => 'round', - '1' => 'in-between', - '2' => 'cigar-shaped' - }, - 'T2' => { - '0' => 'yes', - '1' => 'no' - }, - 'T3' => { - '0' => 'rounded', - '1' => 'boxy', - '2' => 'none' - }, - 'T4' => { - '0' => 'no', - '1' => 'weak', - '2' => 'strong' - }, - 'T5' => { - '0' => 'yes', - '1' => 'no' - }, - 'T6' => { - '0' => 'tight', - '1' => 'medium', - '2' => 'loose' - }, - 'T7' => { - '0' => '1', - '1' => '2', - '2' => '3', - '3' => '4', - '4' => 'more-than-4', - '5' => 'cant-tell' - }, - 'T8' => { - '0' => 'none', - '1' => 'small', - '2' => 'moderate', - '3' => 'large', - '4' => 'dominant' - }, - 'T11' => { - '0' => 'merger', - '1' => 'major-disturbance', - '2' => 'minor-disturbance', - '3' => 'none' - }, - 'T12' => { - '0' => 'yes', - '1' => 'no' - }, - 'T13' => { - '0' => 'yes', - '1' => 'no' - }, - 'T14' => { - '0' => 'star', - '1' => 'artifact', - '2' => 'zoom' - }, - 'T15' => { - '0' => 'saturation', - '1' => 'diffraction', - '2' => 'satellite', - '3' => 'ray', - '4' => 'scattered', - '5' => 'other' - } - } - end - let(:label_prefix_schema) do - { - 'T0' => 'smooth-or-featured', - 'T1' => 'how-rounded', - 'T2' => 'disk-edge-on', - 'T3' => 'edge-on-bulge', - 'T4' => 'bar', - 'T5' => 'has-spiral-arms', - 'T6' => 'spiral-winding', - 'T7' => 'spiral-arm-count', - 'T8' => 'bulge-size', - 'T11' => 'merging', # T10 is not used for training and no T9 :shrug: - 'T12' => 'lensing', - 'T13' => 'clumps', - 'T14' => 'problem', - 'T15' => 'artifact' - } - end + let(:data_label_schema) {LabelExtractors::GalaxyZoo::CosmicDawn::TASK_KEY_DATA_LABELS} + let(:label_prefix_schema) {LabelExtractors::GalaxyZoo::CosmicDawn::TASK_KEY_LABEL_PREFIXES} describe '#label_prefixes' do it 'has the correct schema label prefixes' do @@ -146,14 +51,14 @@ def expected_labels(label_prefix, task_lookup_key, payload) expect { # T0 has 3 choices (0, 1, 2) described_class.new('T0').extract(unknown_key_payload) - }.to raise_error(LabelExtractors::BaseExtractor::UnknownLabelKey, 'key not found: 3') + }.to raise_error(LabelExtractors::GalaxyZoo::UnknownLabelKey, 'key not found: 3') end it 'raises an error if the task key is not found in the known schema' do expect { # T16 is unknown in this schema described_class.new('T16').extract(data_payload) - }.to raise_error(LabelExtractors::BaseExtractor::UnknownTaskKey, 'key not found: T16') + }.to raise_error(LabelExtractors::GalaxyZoo::UnknownTaskKey, 'key not found: T16') end end end diff --git a/spec/modules/label_extractors/galaxy_zoo/decals_spec.rb b/spec/modules/label_extractors/galaxy_zoo/decals_spec.rb index 5a49ac7..46cc585 100644 --- a/spec/modules/label_extractors/galaxy_zoo/decals_spec.rb +++ b/spec/modules/label_extractors/galaxy_zoo/decals_spec.rb @@ -3,78 +3,8 @@ require 'rails_helper' RSpec.describe LabelExtractors::GalaxyZoo::Decals do - let(:data_label_schema) do - { - 'T0' => { - '0' => 'smooth', - '1' => 'featured-or-disk', - '2' => 'artifact' - }, - 'T1' => { - '0' => 'round', - '1' => 'in-between', - '2' => 'cigar-shaped' - }, - 'T2' => { - '0' => 'yes', - '1' => 'no' - }, - 'T3' => { - '0' => 'rounded', - '1' => 'boxy', - '2' => 'none' - }, - 'T4' => { - '0' => 'no', - '1' => 'weak', - '2' => 'strong' - }, - 'T5' => { - '0' => 'yes', - '1' => 'no' - }, - 'T6' => { - '0' => 'tight', - '1' => 'medium', - '2' => 'loose' - }, - 'T7' => { - '0' => '1', - '1' => '2', - '2' => '3', - '3' => '4', - '4' => 'more-than-4', - '5' => 'cant-tell' - }, - 'T8' => { - '0' => 'none', - '1' => 'small', - '2' => 'moderate', - '3' => 'large', - '4' => 'dominant' - }, - 'T11' => { - '0' => 'merger', - '1' => 'major-disturbance', - '2' => 'minor-disturbance', - '3' => 'none' - } - } - end - let(:label_prefix_schema) do - { - 'T0' => 'smooth-or-featured', - 'T1' => 'how-rounded', - 'T2' => 'disk-edge-on', - 'T3' => 'edge-on-bulge', - 'T4' => 'bar', - 'T5' => 'has-spiral-arms', - 'T6' => 'spiral-winding', - 'T7' => 'spiral-arm-count', - 'T8' => 'bulge-size', - 'T11' => 'merging' # T10 is not used for training and no T9 :shrug: - } - end + let(:data_label_schema) {LabelExtractors::GalaxyZoo::Decals::TASK_KEY_DATA_LABELS} + let(:label_prefix_schema) {LabelExtractors::GalaxyZoo::Decals::TASK_KEY_LABEL_PREFIXES} describe '#label_prefixes' do it 'has the correct schema label prefixes' do @@ -134,14 +64,14 @@ def expected_labels(label_prefix, task_lookup_key, payload) expect { # T0 has 3 choices (0, 1, 2) described_class.new('T0').extract(unknown_key_payload) - }.to raise_error(LabelExtractors::BaseExtractor::UnknownLabelKey, 'key not found: 3') + }.to raise_error(LabelExtractors::GalaxyZoo::UnknownLabelKey, 'key not found: 3') end it 'raises an error if the task key is not found in the known schema' do expect { # T12 is unknonw in this schema described_class.new('T12').extract(data_payload) - }.to raise_error(LabelExtractors::BaseExtractor::UnknownTaskKey, 'key not found: T12') + }.to raise_error(LabelExtractors::GalaxyZoo::UnknownTaskKey, 'key not found: T12') end end end diff --git a/spec/modules/label_extractors/galaxy_zoo/euclid_spec.rb b/spec/modules/label_extractors/galaxy_zoo/euclid_spec.rb index abb222a..a0420bd 100644 --- a/spec/modules/label_extractors/galaxy_zoo/euclid_spec.rb +++ b/spec/modules/label_extractors/galaxy_zoo/euclid_spec.rb @@ -3,104 +3,8 @@ require 'rails_helper' RSpec.describe LabelExtractors::GalaxyZoo::Euclid do - let(:data_label_schema) do - { - 'T0' => { - '0' => 'smooth', - '1' => 'featured-or-disk', - '2' => 'problem' - }, - 'T1' => { - '0' => 'round', - '1' => 'in-between', - '2' => 'cigar-shaped' - }, - 'T2' => { - '0' => 'yes', - '1' => 'no' - }, - 'T3' => { - '0' => 'rounded', - '1' => 'boxy', - '2' => 'none' - }, - 'T4' => { - '0' => 'no', - '1' => 'weak', - '2' => 'strong' - }, - 'T5' => { - '0' => 'yes', - '1' => 'no' - }, - 'T6' => { - '0' => 'tight', - '1' => 'medium', - '2' => 'loose' - }, - 'T7' => { - '0' => '1', - '1' => '2', - '2' => '3', - '3' => '4', - '4' => 'more-than-4', - '5' => 'cant-tell' - }, - 'T8' => { - '0' => 'none', - '1' => 'small', - '2' => 'moderate', - '3' => 'large', - '4' => 'dominant' - }, - 'T11' => { - '0' => 'merger', - '1' => 'major-disturbance', - '2' => 'minor-disturbance', - '3' => 'none' - }, - 'T12' => { - '0' => 'yes', - '1' => 'no' - }, - 'T13' => { - '0' => 'yes', - '1' => 'no' - }, - 'T14' => { - '0' => 'star', - '1' => 'artifact', - '2' => 'zoom' - }, - 'T15' => { - '0' => 'saturation', - '1' => 'diffraction', - '2' => 'satellite', - '3' => 'ray', - '4' => 'scattered', - '5' => 'other', - '6' => 'ghost' - } - } - end - let(:label_prefix_schema) do - { - 'T0' => 'smooth-or-featured', - 'T1' => 'how-rounded', - 'T2' => 'disk-edge-on', - 'T3' => 'edge-on-bulge', - 'T4' => 'bar', - 'T5' => 'has-spiral-arms', - 'T6' => 'spiral-winding', - 'T7' => 'spiral-arm-count', - 'T8' => 'bulge-size', - 'T11' => 'merging', # T10 is not used for training and no T9 :shrug: - 'T12' => 'lensing', - 'T13' => 'clumps', - 'T14' => 'problem', - 'T15' => 'artifact' - } - end + let(:data_label_schema) {LabelExtractors::GalaxyZoo::Euclid::TASK_KEY_DATA_LABELS} + let(:label_prefix_schema) {LabelExtractors::GalaxyZoo::Euclid::TASK_KEY_LABEL_PREFIXES} describe '#label_prefixes' do it 'has the correct schema label prefixes' do @@ -147,14 +51,14 @@ def expected_labels(label_prefix, task_lookup_key, payload) expect { # T0 has 3 choices (0, 1, 2) described_class.new('T0').extract(unknown_key_payload) - }.to raise_error(LabelExtractors::BaseExtractor::UnknownLabelKey, 'key not found: 3') + }.to raise_error(LabelExtractors::GalaxyZoo::UnknownLabelKey, 'key not found: 3') end it 'raises an error if the task key is not found in the known schema' do expect { # T16 is unknown in this schema described_class.new('T16').extract(data_payload) - }.to raise_error(LabelExtractors::BaseExtractor::UnknownTaskKey, 'key not found: T16') + }.to raise_error(LabelExtractors::GalaxyZoo::UnknownTaskKey, 'key not found: T16') end end end