Skip to content

Commit

Permalink
Symbolize keys for PublishingApiDocument
Browse files Browse the repository at this point in the history
This lets us write somewhat neater, Rubyish code and rely on keys being
symbols everywhere.
  • Loading branch information
csutter committed Nov 3, 2023
1 parent db841df commit 7bc4bd8
Show file tree
Hide file tree
Showing 11 changed files with 147 additions and 146 deletions.
3 changes: 2 additions & 1 deletion app/message_processors/publishing_api_message_processor.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
class PublishingApiMessageProcessor
# Implements the callback interface required by `govuk_message_queue_consumer`
def process(message)
document = PublishingApiDocument.for(message.payload)
document_hash = message.payload.deep_symbolize_keys
document = PublishingApiDocument.for(document_hash)
document.synchronize

message.ack
Expand Down
6 changes: 3 additions & 3 deletions app/models/publishing_api_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ module PublishingApiDocument
# Factory method returning a Document instance of an appropriate concrete type for the given
# document hash.
def self.for(document_hash)
case document_hash["document_type"]
case document_hash[:document_type]
when *UNPUBLISH_DOCUMENT_TYPES
Unpublish.new(document_hash)
when *Rails.configuration.document_type_ignorelist
return Publish.new(document_hash) if force_add_path?(document_hash["base_path"])
return Publish.new(document_hash) if force_add_path?(document_hash[:base_path])

Ignore.new(document_hash)
else
return Ignore.new(document_hash) unless document_hash["locale"].in?(["en", nil])
return Ignore.new(document_hash) unless document_hash[:locale].in?(["en", nil])

Publish.new(document_hash)
end
Expand Down
4 changes: 2 additions & 2 deletions app/models/publishing_api_document/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ def synchronize(service: nil)

# The content ID of the document.
def content_id
document_hash.fetch("content_id")
document_hash.fetch(:content_id)
end

# The payload version of the document.
def payload_version
document_hash.fetch("payload_version").to_i
document_hash.fetch(:payload_version).to_i
end

private
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def initialize(content_with_multiple_types)
end

def html_content
@content_with_multiple_types.find { _1["content_type"] == "text/html" }&.dig("content")
@content_with_multiple_types.find { _1[:content_type] == "text/html" }&.dig(:content)
end

def text_content
Expand Down
48 changes: 24 additions & 24 deletions app/models/publishing_api_document/publish.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class Publish < Base
$.details.body
$.details.need_to_know
$.details.more_information
].map { JsonPath.new(_1) }.freeze
].map { JsonPath.new(_1, use_symbols: true) }.freeze
INDEXABLE_CONTENT_SEPARATOR = "\n".freeze

# All the possible keys in the message hash that can contain additional keywords or other text
Expand All @@ -33,7 +33,7 @@ class Publish < Base
$.details.metadata.tribunal_decision_landmark_name
$.details.acronym
$.details.attachments[*]['title','isbn','unique_reference','command_paper_number','hoc_paper_number']
].map { JsonPath.new(_1) }.freeze
].map { JsonPath.new(_1, use_symbols: true) }.freeze
ADDITIONAL_SEARCHABLE_TEXT_VALUES_SEPARATOR = "\n".freeze

# Synchronize the document to the given service (i.e. create or update it remotely)
Expand All @@ -44,32 +44,32 @@ def synchronize(service: DiscoveryEngine::Put.new)
# Extracts a hash of structured metadata about this document.
def metadata
{
content_id: document_hash["content_id"],
title: document_hash["title"],
description: document_hash["description"],
content_id: document_hash[:content_id],
title: document_hash[:title],
description: document_hash[:description],
additional_searchable_text:,
link:,
url:,
public_timestamp:,
document_type: document_hash["document_type"],
content_purpose_supergroup: document_hash["content_purpose_supergroup"],
part_of_taxonomy_tree: document_hash.dig("links", "taxons") || [],
document_type: document_hash[:document_type],
content_purpose_supergroup: document_hash[:content_purpose_supergroup],
part_of_taxonomy_tree: document_hash.dig(:links, :taxons) || [],
# Vertex can only currently boost on numeric fields, not booleans
is_historic: historic? ? 1 : 0,
government_name:,
organisation_state:,
locale: document_hash["locale"],
locale: document_hash[:locale],
parts:,
}.compact
end

# Extracts a single string of indexable unstructured content from the document.
def content
values_from_json_paths = INDEXABLE_CONTENT_VALUES_JSON_PATHS.map { _1.on(document_hash) }
values_from_parts = document_hash.dig("details", "parts")&.map do
values_from_parts = document_hash.dig(:details, :parts)&.map do
# Add the part title as a heading to help the search model better understand the structure
# of the content
["<h1>#{_1['title']}</h1>", ContentWithMultipleTypes.new(_1["body"]).html_content]
["<h1>#{_1[:title]}</h1>", ContentWithMultipleTypes.new(_1[:body]).html_content]
end

[
Expand All @@ -81,7 +81,7 @@ def content
private

def link
document_hash["base_path"].presence || document_hash.dig("details", "url")
document_hash[:base_path].presence || document_hash.dig(:details, :url)
end

def link_relative?
Expand All @@ -103,40 +103,40 @@ def additional_searchable_text
end

def public_timestamp
return nil unless document_hash["public_updated_at"]
return nil unless document_hash[:public_updated_at]

# rubocop:disable Rails/TimeZone (string already contains timezone info which would be lost)
Time.parse(document_hash["public_updated_at"]).to_i
Time.parse(document_hash[:public_updated_at]).to_i
# rubocop:enable Rails/TimeZone
end

def historic?
political = document_hash.dig("details", "political") || false
government = document_hash.dig("expanded_links", "government")&.first
political = document_hash.dig(:details, :political) || false
government = document_hash.dig(:expanded_links, :government)&.first

political && government&.dig("details", "current") == false
political && government&.dig(:details, :current) == false
end

def government_name
document_hash
.dig("expanded_links", "government")
.dig(:expanded_links, :government)
&.first
&.dig("title")
&.dig(:title)
end

def organisation_state
document_hash
.dig("details", "organisation_govuk_status", "status")
.dig(:details, :organisation_govuk_status, :status)
end

def parts
document_hash
.dig("details", "parts")
.dig(:details, :parts)
&.map do
{
slug: _1["slug"],
title: _1["title"],
body: ContentWithMultipleTypes.new(_1["body"]).summarized_text_content,
slug: _1[:slug],
title: _1[:title],
body: ContentWithMultipleTypes.new(_1[:body]).summarized_text_content,
}
end
end
Expand Down
2 changes: 1 addition & 1 deletion spec/integration/metadata_schema_compliance_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
].each do |message_fixture|
context "when processing a '#{message_fixture}'" do
let(:document_hash) { json_fixture_as_hash("message_queue/#{message_fixture}.json") }
let(:document) { PublishingApiDocument::Publish.new(document_hash) }
let(:document) { PublishingApiDocument::Publish.new(document_hash.deep_symbolize_keys) }

it "results in a document validating against the datastore schema" do
expect(document.metadata).to match_json_schema(metadata_json_schema)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

describe "when receiving an incoming message" do
let(:message) { GovukMessageQueueConsumer::MockMessage.new(payload) }
let(:payload) { { "I am" => "a message" } }
let(:payload) { { "I am": "a message" } }
let(:logger) { instance_double(Logger, info: nil, error: nil) }

before do
Expand Down Expand Up @@ -47,7 +47,7 @@
expect(logger).to have_received(:error).with(<<~MSG)
Failed to process incoming document message:
RuntimeError: Could not process
Message content: {\"I am\"=>\"a message\"}
Message content: {:\"I am\"=>\"a message\"}
MSG
end

Expand Down Expand Up @@ -78,7 +78,7 @@
expect(logger).to have_received(:error).with(<<~MSG)
Failed to process incoming document message:
RuntimeError: Could not synchronize
Message content: {\"I am\"=>\"a message\"}
Message content: {:\"I am\"=>\"a message\"}
MSG
end

Expand Down
6 changes: 3 additions & 3 deletions spec/models/publishing_api_document/ignore_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
let(:document_type) { "ignored" }
let(:document_hash) do
{
"content_id" => content_id,
"payload_version" => payload_version,
"document_type" => document_type,
content_id:,
payload_version:,
document_type:,
}
end

Expand Down
Loading

0 comments on commit 7bc4bd8

Please sign in to comment.