Skip to content

Commit

Permalink
Merge pull request #95 from alphagov/symbolize-keys
Browse files Browse the repository at this point in the history
Symbolize keys for `PublishingApiDocument`
  • Loading branch information
csutter authored Nov 3, 2023
2 parents db841df + 7bc4bd8 commit 2bb645c
Show file tree
Hide file tree
Showing 11 changed files with 147 additions and 146 deletions.
3 changes: 2 additions & 1 deletion app/message_processors/publishing_api_message_processor.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
class PublishingApiMessageProcessor
# Implements the callback interface required by `govuk_message_queue_consumer`
def process(message)
document = PublishingApiDocument.for(message.payload)
document_hash = message.payload.deep_symbolize_keys
document = PublishingApiDocument.for(document_hash)
document.synchronize

message.ack
Expand Down
6 changes: 3 additions & 3 deletions app/models/publishing_api_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ module PublishingApiDocument
# Factory method returning a Document instance of an appropriate concrete type for the given
# document hash.
def self.for(document_hash)
case document_hash["document_type"]
case document_hash[:document_type]
when *UNPUBLISH_DOCUMENT_TYPES
Unpublish.new(document_hash)
when *Rails.configuration.document_type_ignorelist
return Publish.new(document_hash) if force_add_path?(document_hash["base_path"])
return Publish.new(document_hash) if force_add_path?(document_hash[:base_path])

Ignore.new(document_hash)
else
return Ignore.new(document_hash) unless document_hash["locale"].in?(["en", nil])
return Ignore.new(document_hash) unless document_hash[:locale].in?(["en", nil])

Publish.new(document_hash)
end
Expand Down
4 changes: 2 additions & 2 deletions app/models/publishing_api_document/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ def synchronize(service: nil)

# The content ID of the document.
def content_id
document_hash.fetch("content_id")
document_hash.fetch(:content_id)
end

# The payload version of the document.
def payload_version
document_hash.fetch("payload_version").to_i
document_hash.fetch(:payload_version).to_i
end

private
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def initialize(content_with_multiple_types)
end

def html_content
@content_with_multiple_types.find { _1["content_type"] == "text/html" }&.dig("content")
@content_with_multiple_types.find { _1[:content_type] == "text/html" }&.dig(:content)
end

def text_content
Expand Down
48 changes: 24 additions & 24 deletions app/models/publishing_api_document/publish.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class Publish < Base
$.details.body
$.details.need_to_know
$.details.more_information
].map { JsonPath.new(_1) }.freeze
].map { JsonPath.new(_1, use_symbols: true) }.freeze
INDEXABLE_CONTENT_SEPARATOR = "\n".freeze

# All the possible keys in the message hash that can contain additional keywords or other text
Expand All @@ -33,7 +33,7 @@ class Publish < Base
$.details.metadata.tribunal_decision_landmark_name
$.details.acronym
$.details.attachments[*]['title','isbn','unique_reference','command_paper_number','hoc_paper_number']
].map { JsonPath.new(_1) }.freeze
].map { JsonPath.new(_1, use_symbols: true) }.freeze
ADDITIONAL_SEARCHABLE_TEXT_VALUES_SEPARATOR = "\n".freeze

# Synchronize the document to the given service (i.e. create or update it remotely)
Expand All @@ -44,32 +44,32 @@ def synchronize(service: DiscoveryEngine::Put.new)
# Extracts a hash of structured metadata about this document.
def metadata
{
content_id: document_hash["content_id"],
title: document_hash["title"],
description: document_hash["description"],
content_id: document_hash[:content_id],
title: document_hash[:title],
description: document_hash[:description],
additional_searchable_text:,
link:,
url:,
public_timestamp:,
document_type: document_hash["document_type"],
content_purpose_supergroup: document_hash["content_purpose_supergroup"],
part_of_taxonomy_tree: document_hash.dig("links", "taxons") || [],
document_type: document_hash[:document_type],
content_purpose_supergroup: document_hash[:content_purpose_supergroup],
part_of_taxonomy_tree: document_hash.dig(:links, :taxons) || [],
# Vertex can only currently boost on numeric fields, not booleans
is_historic: historic? ? 1 : 0,
government_name:,
organisation_state:,
locale: document_hash["locale"],
locale: document_hash[:locale],
parts:,
}.compact
end

# Extracts a single string of indexable unstructured content from the document.
def content
values_from_json_paths = INDEXABLE_CONTENT_VALUES_JSON_PATHS.map { _1.on(document_hash) }
values_from_parts = document_hash.dig("details", "parts")&.map do
values_from_parts = document_hash.dig(:details, :parts)&.map do
# Add the part title as a heading to help the search model better understand the structure
# of the content
["<h1>#{_1['title']}</h1>", ContentWithMultipleTypes.new(_1["body"]).html_content]
["<h1>#{_1[:title]}</h1>", ContentWithMultipleTypes.new(_1[:body]).html_content]
end

[
Expand All @@ -81,7 +81,7 @@ def content
private

def link
document_hash["base_path"].presence || document_hash.dig("details", "url")
document_hash[:base_path].presence || document_hash.dig(:details, :url)
end

def link_relative?
Expand All @@ -103,40 +103,40 @@ def additional_searchable_text
end

def public_timestamp
return nil unless document_hash["public_updated_at"]
return nil unless document_hash[:public_updated_at]

# rubocop:disable Rails/TimeZone (string already contains timezone info which would be lost)
Time.parse(document_hash["public_updated_at"]).to_i
Time.parse(document_hash[:public_updated_at]).to_i
# rubocop:enable Rails/TimeZone
end

def historic?
political = document_hash.dig("details", "political") || false
government = document_hash.dig("expanded_links", "government")&.first
political = document_hash.dig(:details, :political) || false
government = document_hash.dig(:expanded_links, :government)&.first

political && government&.dig("details", "current") == false
political && government&.dig(:details, :current) == false
end

def government_name
document_hash
.dig("expanded_links", "government")
.dig(:expanded_links, :government)
&.first
&.dig("title")
&.dig(:title)
end

def organisation_state
document_hash
.dig("details", "organisation_govuk_status", "status")
.dig(:details, :organisation_govuk_status, :status)
end

def parts
document_hash
.dig("details", "parts")
.dig(:details, :parts)
&.map do
{
slug: _1["slug"],
title: _1["title"],
body: ContentWithMultipleTypes.new(_1["body"]).summarized_text_content,
slug: _1[:slug],
title: _1[:title],
body: ContentWithMultipleTypes.new(_1[:body]).summarized_text_content,
}
end
end
Expand Down
2 changes: 1 addition & 1 deletion spec/integration/metadata_schema_compliance_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
].each do |message_fixture|
context "when processing a '#{message_fixture}'" do
let(:document_hash) { json_fixture_as_hash("message_queue/#{message_fixture}.json") }
let(:document) { PublishingApiDocument::Publish.new(document_hash) }
let(:document) { PublishingApiDocument::Publish.new(document_hash.deep_symbolize_keys) }

it "results in a document validating against the datastore schema" do
expect(document.metadata).to match_json_schema(metadata_json_schema)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

describe "when receiving an incoming message" do
let(:message) { GovukMessageQueueConsumer::MockMessage.new(payload) }
let(:payload) { { "I am" => "a message" } }
let(:payload) { { "I am": "a message" } }
let(:logger) { instance_double(Logger, info: nil, error: nil) }

before do
Expand Down Expand Up @@ -47,7 +47,7 @@
expect(logger).to have_received(:error).with(<<~MSG)
Failed to process incoming document message:
RuntimeError: Could not process
Message content: {\"I am\"=>\"a message\"}
Message content: {:\"I am\"=>\"a message\"}
MSG
end

Expand Down Expand Up @@ -78,7 +78,7 @@
expect(logger).to have_received(:error).with(<<~MSG)
Failed to process incoming document message:
RuntimeError: Could not synchronize
Message content: {\"I am\"=>\"a message\"}
Message content: {:\"I am\"=>\"a message\"}
MSG
end

Expand Down
6 changes: 3 additions & 3 deletions spec/models/publishing_api_document/ignore_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
let(:document_type) { "ignored" }
let(:document_hash) do
{
"content_id" => content_id,
"payload_version" => payload_version,
"document_type" => document_type,
content_id:,
payload_version:,
document_type:,
}
end

Expand Down
Loading

0 comments on commit 2bb645c

Please sign in to comment.