From b646c828e7efe34a188845836b12b899efa8338b Mon Sep 17 00:00:00 2001 From: Minno Dang Date: Mon, 14 Oct 2024 19:32:41 +0100 Subject: [PATCH] WIP - trying to add classes for the new specialist-finder index --- config/schema/indexes/specialist-finder.json | 48 +++ elasticsearch.yml | 2 + .../presenters/common_fields_presenter.rb | 1 - .../presenters/elasticsearch_presenter.rb | 151 ---------- lib/index/elasticsearch_processor.rb | 4 + lib/rummager.rb | 7 +- lib/rummager/app.rb | 2 +- lib/search_config.rb | 1 + lib/specialist_finder_index/client.rb | 11 + .../document_type_mapper.rb | 17 ++ .../presenters/elasticsearch_presenter.rb | 277 ++++++++++++++++++ .../presenters/specialist_presenter.rb | 4 +- .../publishing_event_job.rb | 112 +++++++ .../publishing_event_processor.rb | 11 + .../specialist_formats_spec.rb | 19 +- spec/support/index_helpers.rb | 4 +- .../specialist_formats_spec.rb | 4 +- 17 files changed, 503 insertions(+), 172 deletions(-) create mode 100644 config/schema/indexes/specialist-finder.json create mode 100644 lib/specialist_finder_index/client.rb create mode 100644 lib/specialist_finder_index/document_type_mapper.rb create mode 100644 lib/specialist_finder_index/presenters/elasticsearch_presenter.rb rename lib/{govuk_index => specialist_finder_index}/presenters/specialist_presenter.rb (99%) create mode 100644 lib/specialist_finder_index/publishing_event_job.rb create mode 100644 lib/specialist_finder_index/publishing_event_processor.rb rename spec/integration/{govuk_index => specialist_finder_index}/specialist_formats_spec.rb (78%) rename spec/unit/{govuk_index => specialist_finder_index}/specialist_formats_spec.rb (98%) diff --git a/config/schema/indexes/specialist-finder.json b/config/schema/indexes/specialist-finder.json new file mode 100644 index 000000000..471c6c73f --- /dev/null +++ b/config/schema/indexes/specialist-finder.json @@ -0,0 +1,48 @@ +{ + "elasticsearch_types": [ + "aaib_report", + "ai_assurance_portfolio_technique", + "algorithmic_transparency_record", + "animal_disease_case", + "asylum_support_decision", + "business_finance_support_scheme", + "cma_case", + "contact", + "countryside_stewardship_grant", + "drcf_digital_markets_research", + "drug_safety_update", + "edition", + "employment_appeal_tribunal_decision", + "employment_tribunal_decision", + "european_structural_investment_fund", + "export_health_certificate", + "farming_grant", + "flood_and_coastal_erosion_risk_management_research_report", + "hmrc_manual", + "hmrc_manual_section", + "international_development_fund", + "licence_transaction", + "life_saving_maritime_appliance_service_station", + "maib_report", + "manual", + "manual_section", + "marine_equipment_approved_recommendation", + "marine_notice", + "medical_safety_alert", + "person", + "policy", + "product_safety_alert_report_recall", + "protected_food_drink_name", + "raib_report", + "research_for_development_output", + "residential_property_tribunal_decision", + "traffic_commissioner_regulatory_decision", + "service_manual_guide", + "service_manual_topic", + "service_standard_report", + "statutory_instrument", + "tax_tribunal_decision", + "utaac_decision", + "veterans_support_organisation" + ] +} diff --git a/elasticsearch.yml b/elasticsearch.yml index d83dcd193..5e817aedc 100644 --- a/elasticsearch.yml +++ b/elasticsearch.yml @@ -2,6 +2,7 @@ production: &default base_uri: <%= ENV["ELASTICSEARCH_URI"] || 'http://localhost:9200' %> content_index_names: ["detailed", "government"] govuk_index_name: "govuk" + specialist_finder_index_name: "specialist-finder" auxiliary_index_names: ["page-traffic", "metasearch"] registry_index: "government" metasearch_index_name: "metasearch" @@ -23,6 +24,7 @@ test: base_uri: <%= ENV.fetch('ELASTICSEARCH_URI', 'http://localhost:9200') %> content_index_names: ["government_test"] govuk_index_name: "govuk_test" + specialist_finder_index_name: "specialist-finder_test" auxiliary_index_names: ["page-traffic_test", "metasearch_test"] registry_index: "government_test" metasearch_index_name: "metasearch_test" diff --git a/lib/govuk_index/presenters/common_fields_presenter.rb b/lib/govuk_index/presenters/common_fields_presenter.rb index f532ab6e0..ede465646 100644 --- a/lib/govuk_index/presenters/common_fields_presenter.rb +++ b/lib/govuk_index/presenters/common_fields_presenter.rb @@ -1,7 +1,6 @@ module GovukIndex class CommonFieldsPresenter CUSTOM_FORMAT_MAP = { - "esi_fund" => "european_structural_investment_fund", "external_content" => "recommended-link", "service_manual_homepage" => "service_manual_guide", "service_manual_service_standard" => "service_manual_guide", diff --git a/lib/govuk_index/presenters/elasticsearch_presenter.rb b/lib/govuk_index/presenters/elasticsearch_presenter.rb index 40b315684..b1446abbc 100644 --- a/lib/govuk_index/presenters/elasticsearch_presenter.rb +++ b/lib/govuk_index/presenters/elasticsearch_presenter.rb @@ -13,200 +13,53 @@ def type def document { - ai_assurance_technique: specialist.ai_assurance_technique, - aircraft_category: specialist.aircraft_category, - aircraft_type: specialist.aircraft_type, - alert_type: specialist.alert_type, - algorithmic_transparency_record_atrs_version: specialist.algorithmic_transparency_record_atrs_version, - algorithmic_transparency_record_capability: specialist.algorithmic_transparency_record_capability, - algorithmic_transparency_record_date_published: specialist.algorithmic_transparency_record_date_published, - algorithmic_transparency_record_function: specialist.algorithmic_transparency_record_function, - algorithmic_transparency_record_organisation: specialist.algorithmic_transparency_record_organisation, - algorithmic_transparency_record_organisation_type: specialist.algorithmic_transparency_record_organisation_type, - algorithmic_transparency_record_other_tags: specialist.algorithmic_transparency_record_other_tags, - algorithmic_transparency_record_phase: specialist.algorithmic_transparency_record_phase, - algorithmic_transparency_record_region: specialist.algorithmic_transparency_record_region, - algorithmic_transparency_record_task: specialist.algorithmic_transparency_record_task, - areas_of_interest: specialist.areas_of_interest, - assessment_date: specialist.assessment_date, - assurance_technique_approach: specialist.assurance_technique_approach, attachments: common_fields.attachments, - authors: specialist.authors, - business_sizes: specialist.business_sizes, - business_stages: specialist.business_stages, - case_state: specialist.case_state, - case_type: specialist.case_type, - category: specialist.category, - certificate_status: specialist.certificate_status, - class_category: specialist.class_category, - closed_date: specialist.closed_date, - closing_date: specialist.closing_date, - commodity_type: specialist.commodity_type, contact_groups: details.contact_groups, content_id: common_fields.content_id, content_purpose_subgroup: common_fields.content_purpose_subgroup, content_purpose_supergroup: common_fields.content_purpose_supergroup, content_store_document_type: common_fields.content_store_document_type, - continuation_link: specialist.continuation_link, - country: specialist.country, - country_of_origin: specialist.country_of_origin, - date_application: specialist.date_application, - date_of_completion: specialist.date_of_completion, - date_of_occurrence: specialist.date_of_occurrence, - date_of_start: specialist.date_of_start, - date_registration: specialist.date_registration, - date_registration_eu: specialist.date_registration_eu, - decision_subject: specialist.decision_subject, description: common_fields.description, - destination_country: specialist.destination_country, - development_sector: specialist.development_sector, - digital_market_research_area: specialist.digital_market_research_area, - digital_market_research_category: specialist.digital_market_research_category, - digital_market_research_publish_date: specialist.digital_market_research_publish_date, - digital_market_research_publisher: specialist.digital_market_research_publisher, - digital_market_research_topic: specialist.digital_market_research_topic, - disease_case_closed_date: specialist.disease_case_closed_date, - disease_case_opened_date: specialist.disease_case_opened_date, - disease_type: specialist.disease_type, document_type: type, - eligible_entities: specialist.eligible_entities, email_document_supertype: common_fields.email_document_supertype, - first_published_at: specialist.first_published_at, - flood_and_coastal_erosion_category: specialist.flood_and_coastal_erosion_category, format: common_fields.format, - fund_state: specialist.fund_state, - fund_type: specialist.fund_type, - funding_amount: specialist.funding_amount, - funding_source: specialist.funding_source, government_document_supertype: common_fields.government_document_supertype, government_name: common_fields.government_name, - grant_type: specialist.grant_type, - hidden_indexable_content: specialist.hidden_indexable_content, hmrc_manual_section_id: common_fields.section_id, image_url:, indexable_content: indexable.indexable_content, - industries: specialist.industries, - internal_notes: specialist.internal_notes, is_historic: common_fields.historic?, is_political: common_fields.political?, is_withdrawn: common_fields.withdrawn?, - issued_date: specialist.issued_date, - keyword: specialist.keyword, - key_function: specialist.key_function, - laid_date: specialist.laid_date, - land_use: specialist.land_use, - land_types: specialist.land_types, latest_change_note: details.latest_change_note, licence_identifier: details.licence_identifier, - licence_transaction_continuation_link: specialist.licence_transaction_continuation_link, - licence_transaction_industry: specialist.licence_transaction_industry, - licence_transaction_licence_identifier: specialist.licence_transaction_licence_identifier, - licence_transaction_location: specialist.licence_transaction_location, - licence_transaction_will_continue_on: specialist.licence_transaction_will_continue_on, licence_short_description: details.licence_short_description, - life_saving_maritime_appliance_service_station_regions: specialist.life_saving_maritime_appliance_service_station_regions, - life_saving_maritime_appliance_type: specialist.life_saving_maritime_appliance_type, - life_saving_maritime_appliance_manufacturer: specialist.life_saving_maritime_appliance_manufacturer, link: common_fields.link, - location: specialist.location, mainstream_browse_page_content_ids: expanded_links.mainstream_browse_page_content_ids, mainstream_browse_pages: expanded_links.mainstream_browse_pages, manual: details.parent_manual, - marine_notice_topic: specialist.marine_notice_topic, - marine_notice_type: specialist.marine_notice_type, - marine_notice_vessel_type: specialist.marine_notice_vessel_type, - market_sector: specialist.market_sector, - medical_specialism: specialist.medical_specialism, - opened_date: specialist.opened_date, organisation_content_ids: expanded_links.organisation_content_ids, organisations: expanded_links.organisations, - outcome_type: specialist.outcome_type, part_of_taxonomy_tree: expanded_links.part_of_taxonomy_tree, parts: parts.presented_parts, - payment_types: specialist.payment_types, people: expanded_links.people, policy_groups: expanded_links.policy_groups, popularity: common_fields.popularity, popularity_b: common_fields.popularity_b, primary_publishing_organisation: expanded_links.primary_publishing_organisation, - principle: specialist.principle, - product_alert_type: specialist.product_alert_type, - product_category: specialist.product_category, - product_measure_type: specialist.product_measure_type, - product_recall_alert_date: specialist.product_recall_alert_date, - product_risk_level: specialist.product_risk_level, - project_code: specialist.project_code, - project_status: specialist.project_status, - protection_type: specialist.protection_type, public_timestamp: common_fields.public_timestamp, publishing_app: common_fields.publishing_app, - railway_type: specialist.railway_type, - reason_for_protection: specialist.reason_for_protection, - reference_number: specialist.reference_number, - regions: specialist.regions, - register: specialist.register, - registered_name: specialist.registered_name, - registration: specialist.registration, rendering_app: common_fields.rendering_app, - report_type: specialist.report_type, - research_document_type: specialist.research_document_type, - result: specialist.result, - review_status: specialist.review_status, role_appointments: expanded_links.role_appointments, roles: expanded_links.roles, - sector: specialist.sector, - service_provider: specialist.service_provider, - sift_end_date: specialist.sift_end_date, - sifting_status: specialist.sifting_status, slug:, - stage: specialist.stage, - status: specialist.status, - subject: specialist.subject, taxons: expanded_links.taxons, - theme: specialist.theme, - therapeutic_area: specialist.therapeutic_area, - tiers_or_standalone_items: specialist.tiers_or_standalone_items, - time_registration: specialist.time_registration, title: common_fields.title, topical_events: expanded_links.topical_events, - topics: specialist.topics, - traditional_term_grapevine_product_category: specialist.traditional_term_grapevine_product_category, - traditional_term_language: specialist.traditional_term_language, - traditional_term_type: specialist.traditional_term_type, - tribunal_decision_categories: specialist.tribunal_decision_categories, - tribunal_decision_category: specialist.tribunal_decision_category, - tribunal_decision_country: specialist.tribunal_decision_country, - tribunal_decision_decision_date: specialist.tribunal_decision_decision_date, - tribunal_decision_judges: specialist.tribunal_decision_judges, - tribunal_decision_landmark: specialist.tribunal_decision_landmark, - tribunal_decision_reference_number: specialist.tribunal_decision_reference_number, - tribunal_decision_sub_categories: specialist.tribunal_decision_sub_categories, - tribunal_decision_sub_category: specialist.tribunal_decision_sub_category, - types_of_support: specialist.types_of_support, updated_at: common_fields.updated_at, - use_case: specialist.use_case, user_journey_document_supertype: common_fields.user_journey_document_supertype, - value_of_funding: specialist.value_of_funding, - vessel_type: specialist.vessel_type, - veterans_support_organisation_health_and_social_care: specialist.veterans_support_organisation_health_and_social_care, - veterans_support_organisation_finance: specialist.veterans_support_organisation_finance, - veterans_support_organisation_legal_and_justice: specialist.veterans_support_organisation_legal_and_justice, - veterans_support_organisation_employment_education_and_training: specialist.veterans_support_organisation_employment_education_and_training, - veterans_support_organisation_housing: specialist.veterans_support_organisation_housing, - veterans_support_organisation_families_and_children: specialist.veterans_support_organisation_families_and_children, - veterans_support_organisation_community_and_social: specialist.veterans_support_organisation_community_and_social, - veterans_support_organisation_region_england: specialist.veterans_support_organisation_region_england, - veterans_support_organisation_region_northern_ireland: specialist.veterans_support_organisation_region_northern_ireland, - veterans_support_organisation_region_scotland: specialist.veterans_support_organisation_region_scotland, - veterans_support_organisation_region_wales: specialist.veterans_support_organisation_region_wales, view_count: common_fields.view_count, - virus_strain: specialist.virus_strain, - will_continue_on: specialist.will_continue_on, - withdrawn_date: specialist.withdrawn_date, world_locations: expanded_links.world_locations, - year_adopted: specialist.year_adopted, - zone_restriction: specialist.zone_restriction, - zone_type: specialist.zone_type, }.reject { |_, v| v.nil? } end @@ -283,10 +136,6 @@ def expanded_links @expanded_links ||= ExpandedLinksPresenter.new(payload["expanded_links"]) end - def specialist - @specialist ||= SpecialistPresenter.new(payload) - end - def newslike? return false if common_fields.content_store_document_type == "fatality_notice" diff --git a/lib/index/elasticsearch_processor.rb b/lib/index/elasticsearch_processor.rb index 0183231ba..e0ef36f72 100644 --- a/lib/index/elasticsearch_processor.rb +++ b/lib/index/elasticsearch_processor.rb @@ -8,6 +8,10 @@ def self.govuk new(client: GovukIndex::Client) end + def self.specialist_finder + new(client: SpecialistFinderIndex::Client) + end + def initialize(client:) @client = client @actions = [] diff --git a/lib/rummager.rb b/lib/rummager.rb index 9f347fef3..a62d52955 100644 --- a/lib/rummager.rb +++ b/lib/rummager.rb @@ -87,7 +87,9 @@ require "govuk_index/updater" require "govuk_index/client" +require "specialist_finder_index/client" require "govuk_index/document_type_mapper" +require "specialist_finder_index/document_type_mapper" require "govuk_index/page_traffic_job" require "govuk_index/method_builder" require "govuk_index/indexable_content_sanitiser" @@ -101,12 +103,15 @@ require "govuk_index/presenters/elasticsearch_identity" require "govuk_index/presenters/elasticsearch_delete_presenter" require "govuk_index/presenters/elasticsearch_presenter" +require "specialist_finder_index/presenters/elasticsearch_presenter" require "govuk_index/presenters/expanded_links_presenter" require "govuk_index/presenters/indexable_content_presenter" require "govuk_index/presenters/parts_presenter" -require "govuk_index/presenters/specialist_presenter" +require "specialist_finder_index/presenters/specialist_presenter" require "govuk_index/publishing_event_processor" +require "specialist_finder_index/publishing_event_processor" require "govuk_index/publishing_event_job" +require "specialist_finder_index/publishing_event_job" require "govuk_index/supertype_updater" require "govuk_index/supertype_job" require "govuk_message_queue_consumer" diff --git a/lib/rummager/app.rb b/lib/rummager/app.rb index f3cbecf82..1b04dfb01 100644 --- a/lib/rummager/app.rb +++ b/lib/rummager/app.rb @@ -66,7 +66,7 @@ def require_authentication(permission) end def prevent_access_to_govuk - if index_name == "govuk" + if %w[govuk specialist-finder].include?(index_name) halt(403, "Actions to govuk index are not allowed via this endpoint, please use the message queue to update this index") end end diff --git a/lib/search_config.rb b/lib/search_config.rb index d2828663d..ebe2e65f4 100644 --- a/lib/search_config.rb +++ b/lib/search_config.rb @@ -10,6 +10,7 @@ class << self content_index_names spelling_index_names govuk_index_name + specialist_finder_index_name page_traffic_index_name ].each do |config_method| define_method config_method do diff --git a/lib/specialist_finder_index/client.rb b/lib/specialist_finder_index/client.rb new file mode 100644 index 000000000..af2df0464 --- /dev/null +++ b/lib/specialist_finder_index/client.rb @@ -0,0 +1,11 @@ +module SpecialistFinderIndex + class Client < Index::Client + private + + def index_name + # rubocop:disable Naming/MemoizedInstanceVariableName + @_index ||= SearchConfig.specialist_finder_index_name + # rubocop:enable Naming/MemoizedInstanceVariableName + end + end +end diff --git a/lib/specialist_finder_index/document_type_mapper.rb b/lib/specialist_finder_index/document_type_mapper.rb new file mode 100644 index 000000000..98ea3cd71 --- /dev/null +++ b/lib/specialist_finder_index/document_type_mapper.rb @@ -0,0 +1,17 @@ +module SpecialistFinderIndex + class DocumentTypeMapper + UNPUBLISHING_TYPES = %w[gone redirect substitute vanish].freeze + + def initialize(payload) + @payload = payload + end + + def type + @payload["document_type"] + end + + def unpublishing_type? + UNPUBLISHING_TYPES.include?(@payload["document_type"]) + end + end +end diff --git a/lib/specialist_finder_index/presenters/elasticsearch_presenter.rb b/lib/specialist_finder_index/presenters/elasticsearch_presenter.rb new file mode 100644 index 000000000..002626afc --- /dev/null +++ b/lib/specialist_finder_index/presenters/elasticsearch_presenter.rb @@ -0,0 +1,277 @@ +module SpecialistFinderIndex + class ElasticsearchPresenter + include GovukIndex::ElasticsearchIdentity + + def initialize(payload:, type_mapper:) + @payload = payload + @inferred_type = type_mapper + end + + def type + @type ||= @inferred_type.type + end + + def document + { + attachments: common_fields.attachments, + contact_groups: details.contact_groups, + content_id: common_fields.content_id, + content_purpose_subgroup: common_fields.content_purpose_subgroup, + content_purpose_supergroup: common_fields.content_purpose_supergroup, + content_store_document_type: common_fields.content_store_document_type, + description: common_fields.description, + document_type: type, + email_document_supertype: common_fields.email_document_supertype, + format: common_fields.format, + government_document_supertype: common_fields.government_document_supertype, + government_name: common_fields.government_name, + hmrc_manual_section_id: common_fields.section_id, + image_url: details.image_url, + indexable_content: indexable.indexable_content, + is_historic: common_fields.historic?, + is_political: common_fields.political?, + is_withdrawn: common_fields.withdrawn?, + latest_change_note: details.latest_change_note, + licence_identifier: details.licence_identifier, + licence_short_description: details.licence_short_description, + link: common_fields.link, + mainstream_browse_page_content_ids: expanded_links.mainstream_browse_page_content_ids, + mainstream_browse_pages: expanded_links.mainstream_browse_pages, + manual: details.parent_manual, + organisation_content_ids: expanded_links.organisation_content_ids, + organisations: expanded_links.organisations, + part_of_taxonomy_tree: expanded_links.part_of_taxonomy_tree, + parts: parts.presented_parts, + people: expanded_links.people, + policy_groups: expanded_links.policy_groups, + popularity: common_fields.popularity, + popularity_b: common_fields.popularity_b, + primary_publishing_organisation: expanded_links.primary_publishing_organisation, + public_timestamp: common_fields.public_timestamp, + publishing_app: common_fields.publishing_app, + rendering_app: common_fields.rendering_app, + role_appointments: expanded_links.role_appointments, + roles: expanded_links.roles, + taxons: expanded_links.taxons, + title: common_fields.title, + topical_events: expanded_links.topical_events, + updated_at: common_fields.updated_at, + user_journey_document_supertype: common_fields.user_journey_document_supertype, + view_count: common_fields.view_count, + world_locations: expanded_links.world_locations, + }.merge(specialist_fields).reject { |_, v| v.nil? } + end + + def specialist_fields + { + ai_assurance_technique: specialist.ai_assurance_technique, + aircraft_category: specialist.aircraft_category, + aircraft_type: specialist.aircraft_type, + alert_type: specialist.alert_type, + algorithmic_transparency_record_atrs_version: specialist.algorithmic_transparency_record_atrs_version, + algorithmic_transparency_record_capability: specialist.algorithmic_transparency_record_capability, + algorithmic_transparency_record_date_published: specialist.algorithmic_transparency_record_date_published, + algorithmic_transparency_record_function: specialist.algorithmic_transparency_record_function, + algorithmic_transparency_record_organisation: specialist.algorithmic_transparency_record_organisation, + algorithmic_transparency_record_organisation_type: specialist.algorithmic_transparency_record_organisation_type, + algorithmic_transparency_record_other_tags: specialist.algorithmic_transparency_record_other_tags, + algorithmic_transparency_record_phase: specialist.algorithmic_transparency_record_phase, + algorithmic_transparency_record_region: specialist.algorithmic_transparency_record_region, + algorithmic_transparency_record_task: specialist.algorithmic_transparency_record_task, + areas_of_interest: specialist.areas_of_interest, + assessment_date: specialist.assessment_date, + assurance_technique_approach: specialist.assurance_technique_approach, + authors: specialist.authors, + business_sizes: specialist.business_sizes, + business_stages: specialist.business_stages, + case_state: specialist.case_state, + case_type: specialist.case_type, + category: specialist.category, + certificate_status: specialist.certificate_status, + class_category: specialist.class_category, + closed_date: specialist.closed_date, + closing_date: specialist.closing_date, + commodity_type: specialist.commodity_type, + continuation_link: specialist.continuation_link, + country: specialist.country, + country_of_origin: specialist.country_of_origin, + date_application: specialist.date_application, + date_of_completion: specialist.date_of_completion, + date_of_occurrence: specialist.date_of_occurrence, + date_of_start: specialist.date_of_start, + date_registration: specialist.date_registration, + date_registration_eu: specialist.date_registration_eu, + decision_subject: specialist.decision_subject, + destination_country: specialist.destination_country, + development_sector: specialist.development_sector, + digital_market_research_area: specialist.digital_market_research_area, + digital_market_research_category: specialist.digital_market_research_category, + digital_market_research_publish_date: specialist.digital_market_research_publish_date, + digital_market_research_publisher: specialist.digital_market_research_publisher, + digital_market_research_topic: specialist.digital_market_research_topic, + disease_case_closed_date: specialist.disease_case_closed_date, + disease_case_opened_date: specialist.disease_case_opened_date, + disease_type: specialist.disease_type, + eligible_entities: specialist.eligible_entities, + first_published_at: specialist.first_published_at, + flood_and_coastal_erosion_category: specialist.flood_and_coastal_erosion_category, + fund_state: specialist.fund_state, + fund_type: specialist.fund_type, + funding_amount: specialist.funding_amount, + funding_source: specialist.funding_source, + grant_type: specialist.grant_type, + hidden_indexable_content: specialist.hidden_indexable_content, + industries: specialist.industries, + internal_notes: specialist.internal_notes, + issued_date: specialist.issued_date, + keyword: specialist.keyword, + key_function: specialist.key_function, + laid_date: specialist.laid_date, + land_use: specialist.land_use, + land_types: specialist.land_types, + licence_transaction_continuation_link: specialist.licence_transaction_continuation_link, + licence_transaction_industry: specialist.licence_transaction_industry, + licence_transaction_licence_identifier: specialist.licence_transaction_licence_identifier, + licence_transaction_location: specialist.licence_transaction_location, + licence_transaction_will_continue_on: specialist.licence_transaction_will_continue_on, + life_saving_maritime_appliance_service_station_regions: specialist.life_saving_maritime_appliance_service_station_regions, + life_saving_maritime_appliance_type: specialist.life_saving_maritime_appliance_type, + life_saving_maritime_appliance_manufacturer: specialist.life_saving_maritime_appliance_manufacturer, + location: specialist.location, + marine_notice_topic: specialist.marine_notice_topic, + marine_notice_type: specialist.marine_notice_type, + marine_notice_vessel_type: specialist.marine_notice_vessel_type, + market_sector: specialist.market_sector, + medical_specialism: specialist.medical_specialism, + opened_date: specialist.opened_date, + outcome_type: specialist.outcome_type, + payment_types: specialist.payment_types, + principle: specialist.principle, + product_alert_type: specialist.product_alert_type, + product_category: specialist.product_category, + product_measure_type: specialist.product_measure_type, + product_recall_alert_date: specialist.product_recall_alert_date, + product_risk_level: specialist.product_risk_level, + project_code: specialist.project_code, + project_status: specialist.project_status, + protection_type: specialist.protection_type, + railway_type: specialist.railway_type, + reason_for_protection: specialist.reason_for_protection, + reference_number: specialist.reference_number, + regions: specialist.regions, + register: specialist.register, + registered_name: specialist.registered_name, + registration: specialist.registration, + report_type: specialist.report_type, + research_document_type: specialist.research_document_type, + result: specialist.result, + review_status: specialist.review_status, + sector: specialist.sector, + service_provider: specialist.service_provider, + sift_end_date: specialist.sift_end_date, + sifting_status: specialist.sifting_status, + stage: specialist.stage, + status: specialist.status, + subject: specialist.subject, + theme: specialist.theme, + therapeutic_area: specialist.therapeutic_area, + tiers_or_standalone_items: specialist.tiers_or_standalone_items, + time_registration: specialist.time_registration, + topics: specialist.topics, + traditional_term_grapevine_product_category: specialist.traditional_term_grapevine_product_category, + traditional_term_language: specialist.traditional_term_language, + traditional_term_type: specialist.traditional_term_type, + tribunal_decision_categories: specialist.tribunal_decision_categories, + tribunal_decision_category: specialist.tribunal_decision_category, + tribunal_decision_country: specialist.tribunal_decision_country, + tribunal_decision_decision_date: specialist.tribunal_decision_decision_date, + tribunal_decision_judges: specialist.tribunal_decision_judges, + tribunal_decision_landmark: specialist.tribunal_decision_landmark, + tribunal_decision_reference_number: specialist.tribunal_decision_reference_number, + tribunal_decision_sub_categories: specialist.tribunal_decision_sub_categories, + tribunal_decision_sub_category: specialist.tribunal_decision_sub_category, + types_of_support: specialist.types_of_support, + use_case: specialist.use_case, + value_of_funding: specialist.value_of_funding, + vessel_type: specialist.vessel_type, + veterans_support_organisation_health_and_social_care: specialist.veterans_support_organisation_health_and_social_care, + veterans_support_organisation_finance: specialist.veterans_support_organisation_finance, + veterans_support_organisation_legal_and_justice: specialist.veterans_support_organisation_legal_and_justice, + veterans_support_organisation_employment_education_and_training: specialist.veterans_support_organisation_employment_education_and_training, + veterans_support_organisation_housing: specialist.veterans_support_organisation_housing, + veterans_support_organisation_families_and_children: specialist.veterans_support_organisation_families_and_children, + veterans_support_organisation_community_and_social: specialist.veterans_support_organisation_community_and_social, + veterans_support_organisation_region_england: specialist.veterans_support_organisation_region_england, + veterans_support_organisation_region_northern_ireland: specialist.veterans_support_organisation_region_northern_ireland, + veterans_support_organisation_region_scotland: specialist.veterans_support_organisation_region_scotland, + veterans_support_organisation_region_wales: specialist.veterans_support_organisation_region_wales, + virus_strain: specialist.virus_strain, + will_continue_on: specialist.will_continue_on, + withdrawn_date: specialist.withdrawn_date, + year_adopted: specialist.year_adopted, + zone_restriction: specialist.zone_restriction, + zone_type: specialist.zone_type, + } + end + + def updated_at + common_fields.updated_at + end + + def format + common_fields.format + end + + def base_path + common_fields.base_path + end + + def link + common_fields.link + end + + def publishing_app + common_fields.publishing_app + end + + def valid! + if format == "recommended-link" + details.url || raise(MissingExternalUrl, "url missing from details section") + else + base_path || raise(NotIdentifiable, "base_path missing from payload") + end + end + + private + + attr_reader :payload + + def indexable + GovukIndex::IndexableContentPresenter.new( + format: common_fields.format, + details: payload["details"], + sanitiser: GovukIndex::IndexableContentSanitiser.new, + ) + end + + def common_fields + @common_fields ||= GovukIndex::CommonFieldsPresenter.new(payload) + end + + def details + @details ||= GovukIndex::DetailsPresenter.new(details: payload["details"], format: common_fields.format) + end + + def parts + @parts ||= GovukIndex::PartsPresenter.new(parts: payload["details"].fetch("parts", [])) + end + + def expanded_links + @expanded_links ||= GovukIndex::ExpandedLinksPresenter.new(payload["expanded_links"]) + end + + def specialist + @specialist ||= SpecialistPresenter.new(payload) + end + end +end diff --git a/lib/govuk_index/presenters/specialist_presenter.rb b/lib/specialist_finder_index/presenters/specialist_presenter.rb similarity index 99% rename from lib/govuk_index/presenters/specialist_presenter.rb rename to lib/specialist_finder_index/presenters/specialist_presenter.rb index 415a1411a..b5d6b0ed9 100644 --- a/lib/govuk_index/presenters/specialist_presenter.rb +++ b/lib/specialist_finder_index/presenters/specialist_presenter.rb @@ -1,6 +1,6 @@ -module GovukIndex +module SpecialistFinderIndex class SpecialistPresenter - extend MethodBuilder + extend GovukIndex::MethodBuilder set_payload_method :metadata diff --git a/lib/specialist_finder_index/publishing_event_job.rb b/lib/specialist_finder_index/publishing_event_job.rb new file mode 100644 index 000000000..0e9c40e3b --- /dev/null +++ b/lib/specialist_finder_index/publishing_event_job.rb @@ -0,0 +1,112 @@ +module SpecialistFinderIndex + class ElasticsearchRetryError < StandardError; end + + class ElasticsearchInvalidResponseItemCount < StandardError; end + + class MissingTextHtmlContentType < StandardError; end + + class MultipleMessagesInElasticsearchResponse < StandardError; end + + class NotFoundError < StandardError; end + + class UnknownDocumentTypeError < StandardError; end + + class NotIdentifiable < StandardError; end + + class MissingExternalUrl < StandardError; end + + class PublishingEventJob < BaseJob + notify_of_failures + + def perform(messages) + processor = Index::ElasticsearchProcessor.specialist_finder + + messages.each do |routing_key, payload| + process_action(processor, routing_key, payload) + end + + responses = processor.commit + + (responses || []).each do |response| + process_response(response, messages) + end + # Rescuing exception to guarantee we capture all Sidekiq retries + rescue Exception # rubocop:disable Lint/RescueException + Services.statsd_client.increment("soecialist_finder_index.sidekiq-retry") + raise + end + + private + + NON_INDEXED_PAGES = %w[ + finder_email_signup + ] + + def process_action(processor, routing_key, payload) + logger.debug("Processing #{routing_key}: #{payload}") + Services.statsd_client.increment("soecialist_finder_index.sidekiq-consumed") + + type_mapper = DocumentTypeMapper.new(payload) + + presenter = if type_mapper.unpublishing_type? + GovukIndex::ElasticsearchDeletePresenter.new(payload:) + else + ElasticsearchPresenter.new( + payload: GovukIndex::PayloadPreparer.new(payload).prepare, + type_mapper:, + ) + end + + presenter.valid! + + identifier = "#{presenter.link} #{presenter.type || "'unmapped type'"}" + + if NON_INDEXED_PAGES.include? type_mapper.type + logger.info("#{routing_key} -> IGNORE #{identifier}") + elsif type_mapper.unpublishing_type? + logger.info("#{routing_key} -> DELETE #{identifier}") + processor.delete(presenter) + else + logger.info("#{routing_key} -> INDEX #{identifier}") + processor.save(presenter) + end + + # Rescuing as we don't want to retry this class of error + rescue NotIdentifiable => e + GovukError.notify(e, extra: { message_body: payload }) + # Unpublishing messages for something that does not exist may have been + # processed out of order so we don't want to notify errbit but just allow + # the process to continue + rescue NotFoundError + logger.info("#{payload['base_path']} could not be found.") + Services.statsd_client.increment("soecialist_finder_index.not-found-error") + rescue UnknownDocumentTypeError + logger.info("#{payload['document_type']} document type is not known.") + Services.statsd_client.increment("soecialist_finder_index.unknown-document-type") + end + + def process_response(response, messages) + messages_with_error = [] + if response["items"].count > 1 + Services.statsd_client.increment("soecialist_finder_index.elasticsearch.multiple_responses") + end + + if response["items"].count != messages.count + raise ElasticsearchInvalidResponseItemCount, "received #{response['items'].count} expected #{messages.count}" + end + + response["items"].zip(messages).each do |response_for_message, message| + messages_with_error << message unless Index::ResponseValidator.new(namespace: "soecialist_finder_index").valid?(response_for_message) + end + + if messages_with_error.any? + # raise an error so that all messages are retried. + # NOTE: versioned ES actions can be performed multiple with a consistent result. + raise ElasticsearchRetryError.new( + reason: "Elasticsearch failures", + messages: "#{messages_with_error.count} of #{messages.count} failed - see ElasticsearchError's for details", + ) + end + end + end +end diff --git a/lib/specialist_finder_index/publishing_event_processor.rb b/lib/specialist_finder_index/publishing_event_processor.rb new file mode 100644 index 000000000..6e21c26d7 --- /dev/null +++ b/lib/specialist_finder_index/publishing_event_processor.rb @@ -0,0 +1,11 @@ +module SpecialistFinderIndex + class PublishingEventProcessor + def process(messages) + messages = Array(messages) # treat a single message as an array with one value + + Services.statsd_client.increment("specialist_finder_index.rabbit-mq-consumed") + PublishingEventJob.perform_async(messages.map { |msg| [msg.delivery_info[:routing_key], msg.payload] }) + messages.each(&:ack) + end + end +end diff --git a/spec/integration/govuk_index/specialist_formats_spec.rb b/spec/integration/specialist_finder_index/specialist_formats_spec.rb similarity index 78% rename from spec/integration/govuk_index/specialist_formats_spec.rb rename to spec/integration/specialist_finder_index/specialist_formats_spec.rb index e59e63ad7..8738edd0c 100644 --- a/spec/integration/govuk_index/specialist_formats_spec.rb +++ b/spec/integration/specialist_finder_index/specialist_formats_spec.rb @@ -7,7 +7,7 @@ consumer = GovukMessageQueueConsumer::Consumer.new( queue_name: "bigwig.test", - processor: GovukIndex::PublishingEventProcessor.new, + processor: SpecialistFinderIndex::PublishingEventProcessor.new, rabbitmq_connection: bunny_mock, ) @@ -21,11 +21,9 @@ payload: { document_type: "finder" }, ) - allow(GovukIndex::MigratedFormats).to receive(:indexable_formats).and_return("finder" => :all) - @queue.publish(random_example.to_json, content_type: "application/json") - expect_document_is_in_rummager({ "link" => random_example["base_path"] }, index: "govuk_test", type: "edition") + expect_document_is_in_rummager({ "link" => random_example["base_path"] }, index: "specialist-finder_test", type: "finder") end it "specialist documents are correctly indexed" do @@ -60,30 +58,27 @@ schema: "specialist_document", payload: { document_type: specialist_document_type }, ) - allow(GovukIndex::MigratedFormats).to receive(:indexable_formats).and_return(specialist_document_type => :all) @queue.publish(random_example.to_json, content_type: "application/json") - expect_document_is_in_rummager({ "link" => random_example["base_path"] }, index: "govuk_test", type: specialist_document_type) + expect_document_is_in_rummager({ "link" => random_example["base_path"] }, index: "specialist-finder_test", type: specialist_document_type) end end it "esi documents are correctly indexed" do publisher_document_type = "esi_fund" - search_document_type = "european_structural_investment_fund" random_example = generate_random_example( schema: "specialist_document", payload: { document_type: publisher_document_type }, ) - allow(GovukIndex::MigratedFormats).to receive(:indexable_formats).and_return(search_document_type => :all) @queue.publish(random_example.to_json, content_type: "application/json") expect_document_is_in_rummager( - { "link" => random_example["base_path"], "format" => search_document_type }, - index: "govuk_test", - type: search_document_type, + { "link" => random_example["base_path"], "format" => publisher_document_type }, + index: "specialist-finder_test", + type: publisher_document_type, ) end @@ -96,7 +91,7 @@ @queue.publish(random_example.to_json, content_type: "application/json") expect { - fetch_document_from_rummager(id: random_example["base_path"], index: "govuk_test") + fetch_document_from_rummager(id: random_example["base_path"], index: "specialist-finder_test") }.to raise_error(Elasticsearch::Transport::Transport::Errors::NotFound) end end diff --git a/spec/support/index_helpers.rb b/spec/support/index_helpers.rb index 35459a3d2..d5657e232 100644 --- a/spec/support/index_helpers.rb +++ b/spec/support/index_helpers.rb @@ -13,7 +13,7 @@ def self.all_index_names end def self.clean_all - all_index_names.each do |index_name| + all_index_names.append(SearchConfig.specialist_finder_index_name).each do |index_name| clean_index_group(index_name) end end @@ -36,7 +36,7 @@ def self.clean_index_group(index_name) end def self.create_all - all_index_names.each do |index| + all_index_names.append(SearchConfig.specialist_finder_index_name).each do |index| create_test_index(index) end end diff --git a/spec/unit/govuk_index/specialist_formats_spec.rb b/spec/unit/specialist_finder_index/specialist_formats_spec.rb similarity index 98% rename from spec/unit/govuk_index/specialist_formats_spec.rb rename to spec/unit/specialist_finder_index/specialist_formats_spec.rb index 4b9336f19..2d05099df 100644 --- a/spec/unit/govuk_index/specialist_formats_spec.rb +++ b/spec/unit/specialist_finder_index/specialist_formats_spec.rb @@ -1,6 +1,6 @@ require "spec_helper" -RSpec.describe GovukIndex::ElasticsearchPresenter, "Specialist formats" do +RSpec.describe SpecialistFinderIndex::ElasticsearchPresenter, "Specialist formats" do before do allow_any_instance_of(Indexer::PopularityLookup).to receive(:lookup_popularities).and_return({}) end @@ -256,7 +256,7 @@ def build_example_with_metadata(metadata) payload end - type_mapper = GovukIndex::DocumentTypeMapper.new(example) + type_mapper = SpecialistFinderIndex::DocumentTypeMapper.new(example) described_class.new(payload: example, type_mapper:).document end