Skip to content

Commit

Permalink
add base_extractor to galaxyzoo module, use class label keys and pref…
Browse files Browse the repository at this point in the history
…exises for extractor tests, remove error on baseclass
  • Loading branch information
Tooyosi committed Nov 5, 2024
1 parent 06cc71c commit f666b78
Show file tree
Hide file tree
Showing 10 changed files with 84 additions and 348 deletions.
66 changes: 0 additions & 66 deletions app/modules/label_extractors/base_extractor.rb

This file was deleted.

65 changes: 65 additions & 0 deletions app/modules/label_extractors/galaxy_zoo/base_extractor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# frozen_string_literal: true

module LabelExtractors
module GalaxyZoo
class BaseExtractor
attr_reader :task_lookup_key, :task_prefix_label

def initialize(task_lookup_key)
@task_lookup_key = task_lookup_key
@task_prefix_label = task_prefix
end

# extract the keys from the reduction data payload hash
# and convert the keys to the workflow question tasks
#
# e.g. workflow type (GZ) are question type 'decision tree' tasks
# looking at the 'T0' task it correlates to 3 exclusive answers:
# 0 (smooth)
# 1 (features or disk)
# 2 (star or artifact)
#
# then combined with the label prefix used to identify the correlated task name for Zoobot
def extract(data_hash)
data_hash.transform_keys do |key|
# create the lable key used for column headers in the derived training catalogues
# note the hyphen and underscore formatting, see Zoobot label schema for more details
"#{task_prefix_label}-#{data_release_suffix}_#{data_payload_label(key)}"
end
end

def self.label_prefixes
self::TASK_KEY_LABEL_PREFIXES
end

def self.data_labels
self::TASK_KEY_DATA_LABELS
end

# Base version of question_answers_schema method to be customized by subclasses
def self.question_answers_schema
raise NotImplementedError, "Subclass must define `question_answers_schema`"
end

private

def task_prefix
prefix = self.class::TASK_KEY_LABEL_PREFIXES[task_lookup_key]
raise UnknownTaskKey, "key not found: #{task_lookup_key}" unless prefix

prefix
end

def data_payload_label(key)
label = self.class::TASK_KEY_DATA_LABELS.dig(task_lookup_key, key)
raise UnknownLabelKey, "key not found: #{key}" unless label

label
end

def data_release_suffix
self.class::data_release_suffix
end
end
end
end
1 change: 0 additions & 1 deletion app/modules/label_extractors/galaxy_zoo/decals.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# frozen_string_literal: true
require_relative '../base_extractor'

module LabelExtractors
module GalaxyZoo
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
# frozen_string_literal: true
require_relative '../base_extractor'

module LabelExtractors
module Shared
class CosmicDawnAndEuclid < BaseExtractor
class CosmicDawnAndEuclid < LabelExtractors::GalaxyZoo::BaseExtractor

def self.data_release_suffix
raise NotImplementedError, "Subclass must define `data_release_suffix`"
Expand Down
2 changes: 1 addition & 1 deletion lib/bajor/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def create_training_job(manifest_path, workflow_name='cosmic_dawn')
'/training/jobs/',
# NOTE: Here we can augment the batch job run options via bajor
# via the `run_opts: '--wandb --debug'` etc, these could be set via ENV
body: { manifest_path: manifest_path, opts: { 'run_opts': "--schema #{workflow_name}", 'workflow_name': workflow_name } }.to_json,
body: { manifest_path: manifest_path, opts: { 'run_opts': "--schema #{workflow_name.downcase}", 'workflow_name': workflow_name } }.to_json,
headers: JSON_HEADERS
)

Expand Down
2 changes: 1 addition & 1 deletion spec/modules/label_extractors/finder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
it 'raises an error if the task key is not known for the label schema' do
expect {
described_class.extractor_instance('galaxy_zoo_cosmic_dawn_t50')
}.to raise_error(LabelExtractors::BaseExtractor::UnknownTaskKey, 'key not found: T50')
}.to raise_error(LabelExtractors::GalaxyZoo::UnknownTaskKey, 'key not found: T50')
end

it 'finds the decals mission data' do
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# spec/models/label_extractors/base_extractor_spec.rb

class TestExtractor < LabelExtractors::BaseExtractor
class TestExtractor < LabelExtractors::GalaxyZoo::BaseExtractor
TASK_KEY_LABEL_PREFIXES = { task_key_example: "example_prefix" }
TASK_KEY_DATA_LABELS = { task_key_example: { "T0" => "example_label" } }
def self.data_release_suffix
"v1"
end
end

RSpec.describe LabelExtractors::BaseExtractor do
RSpec.describe LabelExtractors::GalaxyZoo::BaseExtractor do
let(:task_lookup_key) { :task_key_example }
let(:data_hash) { { "T0" => "example_key" } }
let(:instance) { TestExtractor.new(task_lookup_key) }
Expand All @@ -35,7 +35,7 @@ def self.data_release_suffix
it 'raises an error for an unknown task_lookup_key' do
invalid_instance = TestExtractor.allocate # Skips calling initialize
allow(invalid_instance).to receive(:task_lookup_key).and_return(:invalid_key)
expect { invalid_instance.send(:task_prefix) }.to raise_error(LabelExtractors::BaseExtractor::UnknownTaskKey)
expect { invalid_instance.send(:task_prefix) }.to raise_error(LabelExtractors::GalaxyZoo::UnknownTaskKey)
end
end

Expand All @@ -45,7 +45,7 @@ def self.data_release_suffix
end

it 'raises an error for an unknown key' do
expect { instance.send(:data_payload_label, "unknown_key") }.to raise_error(LabelExtractors::BaseExtractor::UnknownLabelKey)
expect { instance.send(:data_payload_label, "unknown_key") }.to raise_error(LabelExtractors::GalaxyZoo::UnknownLabelKey)
end
end
end
103 changes: 4 additions & 99 deletions spec/modules/label_extractors/galaxy_zoo/cosmic_dawn_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,103 +3,8 @@
require 'rails_helper'

RSpec.describe LabelExtractors::GalaxyZoo::CosmicDawn do
let(:data_label_schema) do
{
'T0' => {
'0' => 'smooth',
'1' => 'featured-or-disk',
'2' => 'problem'
},
'T1' => {
'0' => 'round',
'1' => 'in-between',
'2' => 'cigar-shaped'
},
'T2' => {
'0' => 'yes',
'1' => 'no'
},
'T3' => {
'0' => 'rounded',
'1' => 'boxy',
'2' => 'none'
},
'T4' => {
'0' => 'no',
'1' => 'weak',
'2' => 'strong'
},
'T5' => {
'0' => 'yes',
'1' => 'no'
},
'T6' => {
'0' => 'tight',
'1' => 'medium',
'2' => 'loose'
},
'T7' => {
'0' => '1',
'1' => '2',
'2' => '3',
'3' => '4',
'4' => 'more-than-4',
'5' => 'cant-tell'
},
'T8' => {
'0' => 'none',
'1' => 'small',
'2' => 'moderate',
'3' => 'large',
'4' => 'dominant'
},
'T11' => {
'0' => 'merger',
'1' => 'major-disturbance',
'2' => 'minor-disturbance',
'3' => 'none'
},
'T12' => {
'0' => 'yes',
'1' => 'no'
},
'T13' => {
'0' => 'yes',
'1' => 'no'
},
'T14' => {
'0' => 'star',
'1' => 'artifact',
'2' => 'zoom'
},
'T15' => {
'0' => 'saturation',
'1' => 'diffraction',
'2' => 'satellite',
'3' => 'ray',
'4' => 'scattered',
'5' => 'other'
}
}
end
let(:label_prefix_schema) do
{
'T0' => 'smooth-or-featured',
'T1' => 'how-rounded',
'T2' => 'disk-edge-on',
'T3' => 'edge-on-bulge',
'T4' => 'bar',
'T5' => 'has-spiral-arms',
'T6' => 'spiral-winding',
'T7' => 'spiral-arm-count',
'T8' => 'bulge-size',
'T11' => 'merging', # T10 is not used for training and no T9 :shrug:
'T12' => 'lensing',
'T13' => 'clumps',
'T14' => 'problem',
'T15' => 'artifact'
}
end
let(:data_label_schema) {LabelExtractors::GalaxyZoo::CosmicDawn::TASK_KEY_DATA_LABELS}
let(:label_prefix_schema) {LabelExtractors::GalaxyZoo::CosmicDawn::TASK_KEY_LABEL_PREFIXES}

describe '#label_prefixes' do
it 'has the correct schema label prefixes' do
Expand Down Expand Up @@ -146,14 +51,14 @@ def expected_labels(label_prefix, task_lookup_key, payload)
expect {
# T0 has 3 choices (0, 1, 2)
described_class.new('T0').extract(unknown_key_payload)
}.to raise_error(LabelExtractors::BaseExtractor::UnknownLabelKey, 'key not found: 3')
}.to raise_error(LabelExtractors::GalaxyZoo::UnknownLabelKey, 'key not found: 3')
end

it 'raises an error if the task key is not found in the known schema' do
expect {
# T16 is unknown in this schema
described_class.new('T16').extract(data_payload)
}.to raise_error(LabelExtractors::BaseExtractor::UnknownTaskKey, 'key not found: T16')
}.to raise_error(LabelExtractors::GalaxyZoo::UnknownTaskKey, 'key not found: T16')
end
end
end
Loading

0 comments on commit f666b78

Please sign in to comment.