Skip to content

Commit

Permalink
Merge pull request #34 from alphagov/document
Browse files Browse the repository at this point in the history
Refactor into simplified class structure
  • Loading branch information
csutter authored Oct 6, 2023
2 parents 55f7f62 + 15d0009 commit 06f32b9
Show file tree
Hide file tree
Showing 20 changed files with 475 additions and 452 deletions.
9 changes: 4 additions & 5 deletions lib/publishing_event_pipeline.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
require "plek"

require "publishing_event_pipeline/configuration"
require "publishing_event_pipeline/document_event_mapper"
require "publishing_event_pipeline/message_processor"

require "publishing_event_pipeline/helpers/extract"

require "publishing_event_pipeline/events/publish"
require "publishing_event_pipeline/events/unpublish"
require "publishing_event_pipeline/extractors/content"
require "publishing_event_pipeline/extractors/metadata"
require "publishing_event_pipeline/document"
require "publishing_event_pipeline/document/base"
require "publishing_event_pipeline/document/publish"
require "publishing_event_pipeline/document/unpublish"

module PublishingEventPipeline
def self.configuration
Expand Down
13 changes: 13 additions & 0 deletions lib/publishing_event_pipeline/document.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module PublishingEventPipeline
module Document
# Factory method returning a Document instance of an appropriate concrete type for the given
# document hash.
def self.for(document_hash)
if Unpublish.handles?(document_hash)
Unpublish.new(document_hash)
else
Publish.new(document_hash)
end
end
end
end
29 changes: 29 additions & 0 deletions lib/publishing_event_pipeline/document/base.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
module PublishingEventPipeline
module Document
# Abstract base class for documents that can be synchronized to a repository.
class Base
def initialize(document_hash)
@document_hash = document_hash
end

# Synchronize the document to the given repository.
def synchronize_to(repository)
raise NotImplementedError, "You must use a concrete subclass of Document"
end

# The content ID of the document.
def content_id
document_hash.fetch("content_id")
end

# The payload version of the document.
def payload_version
document_hash.fetch("payload_version")
end

private

attr_reader :document_hash
end
end
end
57 changes: 57 additions & 0 deletions lib/publishing_event_pipeline/document/publish.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
module PublishingEventPipeline
module Document
class Publish < Base
# All the possible keys in the message hash that can contain content that we want to index
INDEXABLE_CONTENT_VALUES_PATHS = %w[
$.details.body
$.details.contact_groups[*].title
$.details.description
$.details.hidden_search_terms
$.details.introduction
$.details.introductory_paragraph
$.details.metadata.hidden_indexable_content[*]
$.details.metadata.project_code
$.details.more_information
$.details.need_to_know
$.details.parts[*]['title','body']
$.details.summary
$.details.title
].freeze

include Helpers::Extract

# Synchronize the document to the given repository (i.e. put it in the repository).
def synchronize_to(repository)
repository.put(content_id, metadata, content:, payload_version:)
end

# Extracts a hash of structured metadata about this document.
def metadata
link = extract_first(document_hash, %w[$.base_path $.details.url])
url = if link&.start_with?("/")
Plek.website_root + link
else
link
end
public_timestamp = extract_single(document_hash, "$.public_updated_at")
public_timestamp_int = Time.zone.parse(public_timestamp).to_i if public_timestamp

{
content_id: extract_single(document_hash, "$.content_id"),
document_type: extract_single(document_hash, "$.document_type"),
title: extract_single(document_hash, "$.title"),
description: extract_single(document_hash, "$.description"),
link:,
url:,
public_timestamp:,
public_timestamp_int:,
}
end

# Extracts a single string of indexable unstructured content from the document.
def content
extract_all(document_hash, INDEXABLE_CONTENT_VALUES_PATHS)
end
end
end
end
20 changes: 20 additions & 0 deletions lib/publishing_event_pipeline/document/unpublish.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
module PublishingEventPipeline
module Document
class Unpublish < Base
# When a document is unpublished in the source system, its document type changes to one of
# these values. While semantically different for other systems, we only need to know that they
# imply removal from search.
UNPUBLISH_DOCUMENT_TYPES = %w[gone redirect substitute vanish].freeze

# Returns whether this class can handle the given document hash.
def self.handles?(document_hash)
UNPUBLISH_DOCUMENT_TYPES.include?(document_hash.fetch("document_type"))
end

# Synchronize the document to the given repository (i.e. delete it from the repository).
def synchronize_to(repository)
repository.delete(content_id, payload_version:)
end
end
end
end
39 changes: 0 additions & 39 deletions lib/publishing_event_pipeline/document_event_mapper.rb

This file was deleted.

18 changes: 0 additions & 18 deletions lib/publishing_event_pipeline/events/publish.rb

This file was deleted.

16 changes: 0 additions & 16 deletions lib/publishing_event_pipeline/events/unpublish.rb

This file was deleted.

29 changes: 0 additions & 29 deletions lib/publishing_event_pipeline/extractors/content.rb

This file was deleted.

29 changes: 0 additions & 29 deletions lib/publishing_event_pipeline/extractors/metadata.rb

This file was deleted.

12 changes: 4 additions & 8 deletions lib/publishing_event_pipeline/message_processor.rb
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
module PublishingEventPipeline
# Processes incoming content changes from the publishing message queue.
class MessageProcessor
attr_reader :document_event_mapper, :repository
attr_reader :repository

def initialize(
repository:,
document_event_mapper: DocumentEventMapper.new
)
def initialize(repository:)
@repository = repository
@document_event_mapper = document_event_mapper
end

# Implements the callback interface required by `govuk_message_queue_consumer`
def process(message)
event = document_event_mapper.call(message.payload)
event.synchronize_to(repository)
document = Document.for(message.payload)
document.synchronize_to(repository)

message.ack
end
Expand Down
Loading

0 comments on commit 06f32b9

Please sign in to comment.