From 7bc4bd859b74e2f2726dc4426f4fcce9efe20c48 Mon Sep 17 00:00:00 2001 From: Christian Sutter Date: Fri, 3 Nov 2023 12:10:28 +0000 Subject: [PATCH] Symbolize keys for `PublishingApiDocument` This lets us write somewhat neater, Rubyish code and rely on keys being symbols everywhere. --- .../publishing_api_message_processor.rb | 3 +- app/models/publishing_api_document.rb | 6 +- app/models/publishing_api_document/base.rb | 4 +- .../content_with_multiple_types.rb | 2 +- app/models/publishing_api_document/publish.rb | 48 ++--- .../metadata_schema_compliance_spec.rb | 2 +- .../publishing_api_message_processor_spec.rb | 6 +- .../publishing_api_document/ignore_spec.rb | 6 +- .../publishing_api_document/publish_spec.rb | 204 +++++++++--------- .../publishing_api_document/unpublish_spec.rb | 6 +- spec/models/publishing_api_document_spec.rb | 6 +- 11 files changed, 147 insertions(+), 146 deletions(-) diff --git a/app/message_processors/publishing_api_message_processor.rb b/app/message_processors/publishing_api_message_processor.rb index d8aaef8..619a5cf 100644 --- a/app/message_processors/publishing_api_message_processor.rb +++ b/app/message_processors/publishing_api_message_processor.rb @@ -1,7 +1,8 @@ class PublishingApiMessageProcessor # Implements the callback interface required by `govuk_message_queue_consumer` def process(message) - document = PublishingApiDocument.for(message.payload) + document_hash = message.payload.deep_symbolize_keys + document = PublishingApiDocument.for(document_hash) document.synchronize message.ack diff --git a/app/models/publishing_api_document.rb b/app/models/publishing_api_document.rb index c027c17..74bfa17 100644 --- a/app/models/publishing_api_document.rb +++ b/app/models/publishing_api_document.rb @@ -7,15 +7,15 @@ module PublishingApiDocument # Factory method returning a Document instance of an appropriate concrete type for the given # document hash. def self.for(document_hash) - case document_hash["document_type"] + case document_hash[:document_type] when *UNPUBLISH_DOCUMENT_TYPES Unpublish.new(document_hash) when *Rails.configuration.document_type_ignorelist - return Publish.new(document_hash) if force_add_path?(document_hash["base_path"]) + return Publish.new(document_hash) if force_add_path?(document_hash[:base_path]) Ignore.new(document_hash) else - return Ignore.new(document_hash) unless document_hash["locale"].in?(["en", nil]) + return Ignore.new(document_hash) unless document_hash[:locale].in?(["en", nil]) Publish.new(document_hash) end diff --git a/app/models/publishing_api_document/base.rb b/app/models/publishing_api_document/base.rb index 3b11d06..f8349a6 100644 --- a/app/models/publishing_api_document/base.rb +++ b/app/models/publishing_api_document/base.rb @@ -14,12 +14,12 @@ def synchronize(service: nil) # The content ID of the document. def content_id - document_hash.fetch("content_id") + document_hash.fetch(:content_id) end # The payload version of the document. def payload_version - document_hash.fetch("payload_version").to_i + document_hash.fetch(:payload_version).to_i end private diff --git a/app/models/publishing_api_document/content_with_multiple_types.rb b/app/models/publishing_api_document/content_with_multiple_types.rb index 333e107..78361c2 100644 --- a/app/models/publishing_api_document/content_with_multiple_types.rb +++ b/app/models/publishing_api_document/content_with_multiple_types.rb @@ -5,7 +5,7 @@ def initialize(content_with_multiple_types) end def html_content - @content_with_multiple_types.find { _1["content_type"] == "text/html" }&.dig("content") + @content_with_multiple_types.find { _1[:content_type] == "text/html" }&.dig(:content) end def text_content diff --git a/app/models/publishing_api_document/publish.rb b/app/models/publishing_api_document/publish.rb index 85c8355..8a91183 100644 --- a/app/models/publishing_api_document/publish.rb +++ b/app/models/publishing_api_document/publish.rb @@ -12,7 +12,7 @@ class Publish < Base $.details.body $.details.need_to_know $.details.more_information - ].map { JsonPath.new(_1) }.freeze + ].map { JsonPath.new(_1, use_symbols: true) }.freeze INDEXABLE_CONTENT_SEPARATOR = "\n".freeze # All the possible keys in the message hash that can contain additional keywords or other text @@ -33,7 +33,7 @@ class Publish < Base $.details.metadata.tribunal_decision_landmark_name $.details.acronym $.details.attachments[*]['title','isbn','unique_reference','command_paper_number','hoc_paper_number'] - ].map { JsonPath.new(_1) }.freeze + ].map { JsonPath.new(_1, use_symbols: true) }.freeze ADDITIONAL_SEARCHABLE_TEXT_VALUES_SEPARATOR = "\n".freeze # Synchronize the document to the given service (i.e. create or update it remotely) @@ -44,21 +44,21 @@ def synchronize(service: DiscoveryEngine::Put.new) # Extracts a hash of structured metadata about this document. def metadata { - content_id: document_hash["content_id"], - title: document_hash["title"], - description: document_hash["description"], + content_id: document_hash[:content_id], + title: document_hash[:title], + description: document_hash[:description], additional_searchable_text:, link:, url:, public_timestamp:, - document_type: document_hash["document_type"], - content_purpose_supergroup: document_hash["content_purpose_supergroup"], - part_of_taxonomy_tree: document_hash.dig("links", "taxons") || [], + document_type: document_hash[:document_type], + content_purpose_supergroup: document_hash[:content_purpose_supergroup], + part_of_taxonomy_tree: document_hash.dig(:links, :taxons) || [], # Vertex can only currently boost on numeric fields, not booleans is_historic: historic? ? 1 : 0, government_name:, organisation_state:, - locale: document_hash["locale"], + locale: document_hash[:locale], parts:, }.compact end @@ -66,10 +66,10 @@ def metadata # Extracts a single string of indexable unstructured content from the document. def content values_from_json_paths = INDEXABLE_CONTENT_VALUES_JSON_PATHS.map { _1.on(document_hash) } - values_from_parts = document_hash.dig("details", "parts")&.map do + values_from_parts = document_hash.dig(:details, :parts)&.map do # Add the part title as a heading to help the search model better understand the structure # of the content - ["

#{_1['title']}

", ContentWithMultipleTypes.new(_1["body"]).html_content] + ["

#{_1[:title]}

", ContentWithMultipleTypes.new(_1[:body]).html_content] end [ @@ -81,7 +81,7 @@ def content private def link - document_hash["base_path"].presence || document_hash.dig("details", "url") + document_hash[:base_path].presence || document_hash.dig(:details, :url) end def link_relative? @@ -103,40 +103,40 @@ def additional_searchable_text end def public_timestamp - return nil unless document_hash["public_updated_at"] + return nil unless document_hash[:public_updated_at] # rubocop:disable Rails/TimeZone (string already contains timezone info which would be lost) - Time.parse(document_hash["public_updated_at"]).to_i + Time.parse(document_hash[:public_updated_at]).to_i # rubocop:enable Rails/TimeZone end def historic? - political = document_hash.dig("details", "political") || false - government = document_hash.dig("expanded_links", "government")&.first + political = document_hash.dig(:details, :political) || false + government = document_hash.dig(:expanded_links, :government)&.first - political && government&.dig("details", "current") == false + political && government&.dig(:details, :current) == false end def government_name document_hash - .dig("expanded_links", "government") + .dig(:expanded_links, :government) &.first - &.dig("title") + &.dig(:title) end def organisation_state document_hash - .dig("details", "organisation_govuk_status", "status") + .dig(:details, :organisation_govuk_status, :status) end def parts document_hash - .dig("details", "parts") + .dig(:details, :parts) &.map do { - slug: _1["slug"], - title: _1["title"], - body: ContentWithMultipleTypes.new(_1["body"]).summarized_text_content, + slug: _1[:slug], + title: _1[:title], + body: ContentWithMultipleTypes.new(_1[:body]).summarized_text_content, } end end diff --git a/spec/integration/metadata_schema_compliance_spec.rb b/spec/integration/metadata_schema_compliance_spec.rb index 7f77282..0653034 100644 --- a/spec/integration/metadata_schema_compliance_spec.rb +++ b/spec/integration/metadata_schema_compliance_spec.rb @@ -10,7 +10,7 @@ ].each do |message_fixture| context "when processing a '#{message_fixture}'" do let(:document_hash) { json_fixture_as_hash("message_queue/#{message_fixture}.json") } - let(:document) { PublishingApiDocument::Publish.new(document_hash) } + let(:document) { PublishingApiDocument::Publish.new(document_hash.deep_symbolize_keys) } it "results in a document validating against the datastore schema" do expect(document.metadata).to match_json_schema(metadata_json_schema) diff --git a/spec/message_processors/publishing_api_message_processor_spec.rb b/spec/message_processors/publishing_api_message_processor_spec.rb index d559897..a5e1a99 100644 --- a/spec/message_processors/publishing_api_message_processor_spec.rb +++ b/spec/message_processors/publishing_api_message_processor_spec.rb @@ -9,7 +9,7 @@ describe "when receiving an incoming message" do let(:message) { GovukMessageQueueConsumer::MockMessage.new(payload) } - let(:payload) { { "I am" => "a message" } } + let(:payload) { { "I am": "a message" } } let(:logger) { instance_double(Logger, info: nil, error: nil) } before do @@ -47,7 +47,7 @@ expect(logger).to have_received(:error).with(<<~MSG) Failed to process incoming document message: RuntimeError: Could not process - Message content: {\"I am\"=>\"a message\"} + Message content: {:\"I am\"=>\"a message\"} MSG end @@ -78,7 +78,7 @@ expect(logger).to have_received(:error).with(<<~MSG) Failed to process incoming document message: RuntimeError: Could not synchronize - Message content: {\"I am\"=>\"a message\"} + Message content: {:\"I am\"=>\"a message\"} MSG end diff --git a/spec/models/publishing_api_document/ignore_spec.rb b/spec/models/publishing_api_document/ignore_spec.rb index 43c9441..11ad60a 100644 --- a/spec/models/publishing_api_document/ignore_spec.rb +++ b/spec/models/publishing_api_document/ignore_spec.rb @@ -6,9 +6,9 @@ let(:document_type) { "ignored" } let(:document_hash) do { - "content_id" => content_id, - "payload_version" => payload_version, - "document_type" => document_type, + content_id:, + payload_version:, + document_type:, } end diff --git a/spec/models/publishing_api_document/publish_spec.rb b/spec/models/publishing_api_document/publish_spec.rb index 88ee87a..6f1a58c 100644 --- a/spec/models/publishing_api_document/publish_spec.rb +++ b/spec/models/publishing_api_document/publish_spec.rb @@ -6,9 +6,9 @@ let(:document_type) { "press_release" } let(:document_hash) do { - "content_id" => content_id, - "payload_version" => payload_version, - "document_type" => document_type, + content_id:, + payload_version:, + document_type:, } end @@ -30,15 +30,15 @@ describe "with basic top-level fields" do let(:document_hash) do { - "details" => { - "description" => "a", - "introduction" => "b", - "introductory_paragraph" => "c", - "title" => "d", - "summary" => "e", - "body" => "f", - "need_to_know" => "g", - "more_information" => "h", + details: { + description: "a", + introduction: "b", + introductory_paragraph: "c", + title: "d", + summary: "e", + body: "f", + need_to_know: "g", + more_information: "h", }, } end @@ -49,11 +49,11 @@ describe "with contact groups" do let(:document_hash) do { - "details" => { - "contact_groups" => [ - { "title" => "x" }, - { "title" => "y" }, - { "title" => "z" }, + details: { + contact_groups: [ + { title: "x" }, + { title: "y" }, + { title: "z" }, ], }, } @@ -65,25 +65,25 @@ describe "with parts" do let(:document_hash) do { - "details" => { - "parts" => [ + details: { + parts: [ { - "title" => "Foo", - "slug" => "/foo", - "body" => [ + title: "Foo", + slug: "/foo", + body: [ { - "content" => "bar", - "content_type" => "text/html", + content: "bar", + content_type: "text/html", }, ], }, { - "title" => "Bar", - "slug" => "/bar", - "body" => [ + title: "Bar", + slug: "/bar", + body: [ { - "content" => "baz", - "content_type" => "text/html", + content: "baz", + content_type: "text/html", }, ], }, @@ -98,7 +98,7 @@ describe "without any fields" do let(:document_hash) do { - "details" => {}, + details: {}, } end @@ -110,7 +110,7 @@ describe "content_id" do subject(:extracted_content_id) { document.metadata[:content_id] } - let(:document_hash) { { "content_id" => "000-000-000" } } + let(:document_hash) { { content_id: "000-000-000" } } it { is_expected.to eq("000-000-000") } end @@ -118,7 +118,7 @@ describe "title" do subject(:extracted_title) { document.metadata[:title] } - let(:document_hash) { { "title" => "Hello world" } } + let(:document_hash) { { title: "Hello world" } } it { is_expected.to eq("Hello world") } end @@ -126,7 +126,7 @@ describe "description" do subject(:extracted_description) { document.metadata[:description] } - let(:document_hash) { { "description" => "Lorem ipsum dolor sit amet." } } + let(:document_hash) { { description: "Lorem ipsum dolor sit amet." } } it { is_expected.to eq("Lorem ipsum dolor sit amet.") } end @@ -137,8 +137,8 @@ describe "with hidden search terms" do let(:document_hash) do { - "details" => { - "hidden_search_terms" => "a b c", + details: { + hidden_search_terms: "a b c", }, } end @@ -149,9 +149,9 @@ describe "with hidden indexable content as an array" do let(:document_hash) do { - "details" => { - "metadata" => { - "hidden_indexable_content" => %w[x y z], + details: { + metadata: { + hidden_indexable_content: %w[x y z], }, }, } @@ -163,9 +163,9 @@ describe "with hidden indexable content as a string" do let(:document_hash) do { - "details" => { - "metadata" => { - "hidden_indexable_content" => "x y z", + details: { + metadata: { + hidden_indexable_content: "x y z", }, }, } @@ -177,9 +177,9 @@ describe "with a project code" do let(:document_hash) do { - "details" => { - "metadata" => { - "project_code" => "PRINCE2", + details: { + metadata: { + project_code: "PRINCE2", }, }, } @@ -191,8 +191,8 @@ describe "with an acronym" do let(:document_hash) do { - "details" => { - "acronym" => "LOL", + details: { + acronym: "LOL", }, } end @@ -203,10 +203,10 @@ describe "with a registration and aircraft type" do let(:document_hash) do { - "details" => { - "metadata" => { - "registration" => "G-CIVY", - "aircraft_type" => "Boeing 747-436", + details: { + metadata: { + registration: "G-CIVY", + aircraft_type: "Boeing 747-436", }, }, } @@ -218,15 +218,15 @@ describe "with tribunal decision details" do let(:document_hash) do { - "details" => { - "metadata" => { - "tribunal_decision_categories_name" => "A", - "tribunal_decision_country_name" => "B", - "tribunal_decision_judges_name" => "C", - "tribunal_decision_category_name" => "D", - "tribunal_decision_sub_category_name" => "E", - "tribunal_decision_sub_categories_name" => "F", - "tribunal_decision_landmark_name" => "G", + details: { + metadata: { + tribunal_decision_categories_name: "A", + tribunal_decision_country_name: "B", + tribunal_decision_judges_name: "C", + tribunal_decision_category_name: "D", + tribunal_decision_sub_category_name: "E", + tribunal_decision_sub_categories_name: "F", + tribunal_decision_landmark_name: "G", }, }, } @@ -238,19 +238,19 @@ describe "with attachments" do let(:document_hash) do { - "details" => { - "attachments" => [ + details: { + attachments: [ { - "title" => "A report", - "isbn" => "1234567890123", - "unique_reference" => "ABCDEF", - "command_paper_number" => "", + title: "A report", + isbn: "1234567890123", + unique_reference: "ABCDEF", + command_paper_number: "", }, { - "title" => "Another report", - "isbn" => "", - "command_paper_number" => "CPN1234", - "hoc_paper_number" => "ADHOC", + title: "Another report", + isbn: "", + command_paper_number: "CPN1234", + hoc_paper_number: "ADHOC", }, ], }, @@ -265,20 +265,20 @@ subject(:extracted_link) { document.metadata[:link] } context "with a base_path" do - let(:document_hash) { { "base_path" => "/test" } } + let(:document_hash) { { base_path: "/test" } } it { is_expected.to eq("/test") } end context "with an external URL" do - let(:document_hash) { { "details" => { "url" => "https://liverpool.gov.uk/" } } } + let(:document_hash) { { details: { url: "https://liverpool.gov.uk/" } } } it { is_expected.to eq("https://liverpool.gov.uk/") } end context "with both a base_path and an external URL" do let(:document_hash) do - { "base_path" => "/test", "details" => { "url" => "https://liverpool.gov.uk/" } } + { base_path: "/test", details: { url: "https://liverpool.gov.uk/" } } end it { is_expected.to eq("/test") } @@ -301,20 +301,20 @@ end context "with a base_path" do - let(:document_hash) { { "base_path" => "/test" } } + let(:document_hash) { { base_path: "/test" } } it { is_expected.to eq("https://test.gov.uk/test") } end context "with an external URL" do - let(:document_hash) { { "details" => { "url" => "https://liverpool.gov.uk/" } } } + let(:document_hash) { { details: { url: "https://liverpool.gov.uk/" } } } it { is_expected.to eq("https://liverpool.gov.uk/") } end context "with both a base_path and an external URL" do let(:document_hash) do - { "base_path" => "/test", "details" => { "url" => "https://liverpool.gov.uk/" } } + { base_path: "/test", details: { url: "https://liverpool.gov.uk/" } } end it { is_expected.to eq("https://test.gov.uk/test") } @@ -330,7 +330,7 @@ describe "public_timestamp" do subject(:extracted_public_timestamp) { document.metadata[:public_timestamp] } - let(:document_hash) { { "public_updated_at" => "2012-02-01T00:00:00Z" } } + let(:document_hash) { { public_updated_at: "2012-02-01T00:00:00Z" } } it { is_expected.to eq(1_328_054_400) } @@ -344,7 +344,7 @@ describe "document_type" do subject(:extracted_document_type) { document.metadata[:document_type] } - let(:document_hash) { { "document_type" => "foo_bar" } } + let(:document_hash) { { document_type: "foo_bar" } } it { is_expected.to eq("foo_bar") } end @@ -352,7 +352,7 @@ describe "content_purpose_supergroup" do subject(:extracted_content_purpose_supergroup) { document.metadata[:content_purpose_supergroup] } - let(:document_hash) { { "content_purpose_supergroup" => "foo_bar" } } + let(:document_hash) { { content_purpose_supergroup: "foo_bar" } } it { is_expected.to eq("foo_bar") } end @@ -361,7 +361,7 @@ subject(:extracted_part_of_taxonomy_tree) { document.metadata[:part_of_taxonomy_tree] } context "with a set of taxon links" do - let(:document_hash) { { "links" => { "taxons" => %w[0000 ffff] } } } + let(:document_hash) { { links: { taxons: %w[0000 ffff] } } } it { is_expected.to eq(%w[0000 ffff]) } end @@ -377,7 +377,7 @@ subject(:extracted_is_historic) { document.metadata[:is_historic] } context "when the document is non-political" do - let(:document_hash) { { "details" => {} } } + let(:document_hash) { { details: {} } } it { is_expected.to eq(0) } end @@ -385,8 +385,8 @@ context "when the document is political" do let(:document_hash) do { - "details" => { "political" => true }, - "expanded_links" => expanded_links, + details: { political: true }, + expanded_links:, } end @@ -397,13 +397,13 @@ end context "with a link to the current government" do - let(:expanded_links) { { "government" => [{ "details" => { "current" => true } }] } } + let(:expanded_links) { { government: [{ details: { current: true } }] } } it { is_expected.to eq(0) } end context "with a link to a previous government" do - let(:expanded_links) { { "government" => [{ "details" => { "current" => false } }] } } + let(:expanded_links) { { government: [{ details: { current: false } }] } } it { is_expected.to eq(1) } end @@ -413,7 +413,7 @@ describe "government_name" do subject(:extracted_government_name) { document.metadata[:government_name] } - let(:document_hash) { { "expanded_links" => expanded_links } } + let(:document_hash) { { expanded_links: } } context "without link to a government" do let(:expanded_links) { {} } @@ -422,7 +422,7 @@ end context "with a link to a government" do - let(:expanded_links) { { "government" => [{ "title" => "2096 Something Party government" }] } } + let(:expanded_links) { { government: [{ title: "2096 Something Party government" }] } } it { is_expected.to eq("2096 Something Party government") } end @@ -431,7 +431,7 @@ describe "organisation_state" do subject(:extracted_organisation_state) { document.metadata[:organisation_state] } - let(:document_hash) { { "details" => details } } + let(:document_hash) { { details: } } context "without an organisation state" do let(:details) { {} } @@ -440,7 +440,7 @@ end context "with an organisation state" do - let(:details) { { "organisation_govuk_status" => { "status" => "blub" } } } + let(:details) { { organisation_govuk_status: { status: "blub" } } } it { is_expected.to eq("blub") } end @@ -449,7 +449,7 @@ describe "locale" do subject(:extracted_locale) { document.metadata[:locale] } - let(:document_hash) { { "locale" => "en" } } + let(:document_hash) { { locale: "en" } } it { is_expected.to eq("en") } end @@ -457,7 +457,7 @@ describe "parts" do subject(:extracted_parts) { document.metadata[:parts] } - let(:document_hash) { { "details" => { "parts" => parts } } } + let(:document_hash) { { details: { parts: } } } context "when the document has no parts" do let(:parts) { nil } @@ -469,26 +469,26 @@ let(:parts) do [ { - "title" => "Part 1", - "slug" => "/part-1", - "body" => [ + title: "Part 1", + slug: "/part-1", + body: [ { - "content" => "Part 1 body", - "content_type" => "text/simples", + content: "Part 1 body", + content_type: "text/simples", }, { - "content" => "
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur tincidunt sem erat, eget blandit urna porta ac. Mauris lobortis tincidunt dui at pharetra.
", - "content_type" => "text/html", + content: "
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur tincidunt sem erat, eget blandit urna porta ac. Mauris lobortis tincidunt dui at pharetra.
", + content_type: "text/html", }, ], }, { - "title" => "Part 2", - "slug" => "/part-2", - "body" => [ + title: "Part 2", + slug: "/part-2", + body: [ { - "content" => "I have no HTML content :(", - "content_type" => "text/simples", + content: "I have no HTML content :(", + content_type: "text/simples", }, ], }, diff --git a/spec/models/publishing_api_document/unpublish_spec.rb b/spec/models/publishing_api_document/unpublish_spec.rb index 351d890..811c541 100644 --- a/spec/models/publishing_api_document/unpublish_spec.rb +++ b/spec/models/publishing_api_document/unpublish_spec.rb @@ -6,9 +6,9 @@ let(:document_type) { "gone" } let(:document_hash) do { - "content_id" => content_id, - "payload_version" => payload_version, - "document_type" => document_type, + content_id:, + payload_version:, + document_type:, } end diff --git a/spec/models/publishing_api_document_spec.rb b/spec/models/publishing_api_document_spec.rb index cdb82da..f2c0d8d 100644 --- a/spec/models/publishing_api_document_spec.rb +++ b/spec/models/publishing_api_document_spec.rb @@ -4,9 +4,9 @@ let(:document_hash) do { - "document_type" => document_type, - "base_path" => base_path, - "locale" => locale, + document_type:, + base_path:, + locale:, } end let(:base_path) { "/base-path" }