From 21303b1a66cc3245ef10e97c4436859b8b0f58fb Mon Sep 17 00:00:00 2001 From: starswan Date: Tue, 26 Nov 2024 11:48:53 +0000 Subject: [PATCH] Perform most subscription filtering in memory for speed --- app/jobs/alert_email/base.rb | 53 +++++++++++-- app/models/alert_run.rb | 2 + app/models/subscription.rb | 7 -- app/services/search/vacancy_search.rb | 5 +- spec/factories/subscriptions.rb | 4 +- spec/jobs/send_daily_alert_email_job_spec.rb | 83 +++++++++++--------- spec/rails_helper.rb | 2 +- 7 files changed, 100 insertions(+), 56 deletions(-) diff --git a/app/jobs/alert_email/base.rb b/app/jobs/alert_email/base.rb index b7e3cb9253..c7ce3acb74 100644 --- a/app/jobs/alert_email/base.rb +++ b/app/jobs/alert_email/base.rb @@ -1,19 +1,56 @@ class AlertEmail::Base < ApplicationJob - def perform + MAXIMUM_RESULTS_PER_RUN = 500 + + FILTERS = { + teaching_job_roles: ->(vacancy, value) { (vacancy.job_roles & value).any? }, + support_job_roles: ->(vacancy, value) { (vacancy.job_roles & value).any? }, + visa_sponsorship_availability: ->(vacancy, value) { value.include? vacancy.visa_sponsorship_available.to_s }, + ect_statuses: ->(vacancy, value) { value.include?(vacancy.ect_status) }, + subjects: ->(vacancy, value) { (vacancy.subjects & value).any? }, + phases: ->(vacancy, value) { (vacancy.phases & value).any? }, + working_patterns: ->(vacancy, value) { (vacancy.working_patterns & value).any? }, + organisation_slug: ->(vacancy, value) { vacancy.organisations.map(&:slug).include?(value) }, + }.freeze + + def perform # rubocop:disable Metrics/AbcSize return if DisableExpensiveJobs.enabled? - subscriptions.find_each do |subscription| - next if subscription.alert_run_today? + # The intent here is that if we don't have keyword or location searches, then this operation can all be done in memory + # really fast (1 week's worth of vacancies is around 2000, so not worth leaving on disk for each of 100k daily subscriptions + default_scope = Vacancy.includes(:organisations).live.order(publish_on: :desc).search_by_filter(from_date: from_date, to_date: Date.current) + + already_run_ids = AlertRun.for_today.map(&:subscription_id) - vacancies = vacancies_for_subscription(subscription) - next if vacancies.blank? + subscriptions.find_each.reject { |sub| already_run_ids.include?(sub.id) }.each do |subscription| + scope = default_scope + criteria = subscription.search_criteria.symbolize_keys + scope, criteria = handle_keywords(scope, criteria) + scope, criteria = handle_location(scope, criteria) - Jobseekers::AlertMailer.alert(subscription.id, vacancies.pluck(:id)).deliver_later + vacancies = scope.select do |vacancy| + criteria.all? { |criterion, value| FILTERS.fetch(criterion).call(vacancy, value) } + end + + Jobseekers::AlertMailer.alert(subscription.id, vacancies.pluck(:id)).deliver_later if vacancies.any? end Sentry.capture_message("#{self.class.name} run successfully", level: :info) end - def vacancies_for_subscription(subscription) - subscription.vacancies_for_range(from_date, Date.current) + private + + def handle_keywords(scope, criteria) + if criteria.key?(:keyword) + [scope.search_by_full_text(criteria[:keyword]), criteria.except(:keyword)] + else + [scope, criteria] + end + end + + def handle_location(scope, criteria) + if criteria.key?(:location) + [scope.search_by_location(criteria[:location], criteria[:radius]), criteria.except(:location, :radius)] + else + [scope, criteria] + end end end diff --git a/app/models/alert_run.rb b/app/models/alert_run.rb index e6b17ebacd..20100e69c3 100644 --- a/app/models/alert_run.rb +++ b/app/models/alert_run.rb @@ -1,4 +1,6 @@ class AlertRun < ApplicationRecord enum :status, { pending: 0, sent: 1 } belongs_to :subscription + + scope :for_today, -> { where(run_on: Date.current) } end diff --git a/app/models/subscription.rb b/app/models/subscription.rb index a72d409cb2..5b9c415075 100644 --- a/app/models/subscription.rb +++ b/app/models/subscription.rb @@ -1,6 +1,4 @@ class Subscription < ApplicationRecord - MAXIMUM_RESULTS_PER_RUN = 500 - enum :frequency, { daily: 0, weekly: 1 } has_many :alert_runs, dependent: :destroy @@ -41,11 +39,6 @@ def unsubscribe update(email: nil, active: false, unsubscribed_at: Time.current) end - def vacancies_for_range(date_from, date_to) - criteria = search_criteria.symbolize_keys.merge(from_date: date_from, to_date: date_to) - Search::VacancySearch.new(criteria).vacancies.limit(MAXIMUM_RESULTS_PER_RUN) - end - def alert_run_today alert_runs.find_by(run_on: Date.current) end diff --git a/app/services/search/vacancy_search.rb b/app/services/search/vacancy_search.rb index e194f1c2ea..dd689ef986 100644 --- a/app/services/search/vacancy_search.rb +++ b/app/services/search/vacancy_search.rb @@ -4,13 +4,14 @@ class Search::VacancySearch attr_reader :search_criteria, :keyword, :location, :radius, :organisation_slug, :sort - def initialize(search_criteria, sort: nil) + def initialize(search_criteria, sort: nil, scope: Vacancy.live) @search_criteria = search_criteria @keyword = search_criteria[:keyword] @location = search_criteria[:location] @radius = search_criteria[:radius] @organisation_slug = search_criteria[:organisation_slug] @sort = sort || Search::VacancySort.new(keyword: keyword, location: location) + @scope = scope end def active_criteria @@ -56,7 +57,7 @@ def total_count def scope sort_by_distance = sort.by == "distance" - scope = Vacancy.live.includes(:organisations) + scope = @scope.includes(:organisations) scope = scope.where(id: organisation.all_vacancies.pluck(:id)) if organisation scope = scope.search_by_location(location, radius, polygon:, sort_by_distance:) if location scope = scope.search_by_filter(search_criteria) if search_criteria.any? diff --git a/spec/factories/subscriptions.rb b/spec/factories/subscriptions.rb index b9e365c54b..a50f17463c 100644 --- a/spec/factories/subscriptions.rb +++ b/spec/factories/subscriptions.rb @@ -45,11 +45,11 @@ end trait :visa_sponsorship_required do - visa_sponsorship_availability { ["true"] } + visa_sponsorship_availability { %w[true] } end trait :ect_suitable do - ect_statuses { ["ect_suitable"] } + ect_statuses { %w[ect_suitable] } end factory :daily_subscription do diff --git a/spec/jobs/send_daily_alert_email_job_spec.rb b/spec/jobs/send_daily_alert_email_job_spec.rb index 43eb411fc3..a960a3d1dd 100644 --- a/spec/jobs/send_daily_alert_email_job_spec.rb +++ b/spec/jobs/send_daily_alert_email_job_spec.rb @@ -3,20 +3,20 @@ RSpec.describe SendDailyAlertEmailJob do subject(:job) { described_class.perform_later } - let(:mail) { double(:mail) } + let(:mail) { double(:mail, deliver_later: nil) } describe "#perform" do context "with vacancies" do before do - create(:vacancy, :published_slugged, contact_number: "1", ect_status: :ect_unsuitable, job_roles: %w[teacher], subjects: %w[English], phases: %w[secondary], working_patterns: %w[full_time], created_at: 1.day.ago + 1.minute) - create(:vacancy, :published_slugged, contact_number: "2", job_roles: %w[it_support], subjects: %w[English], phases: %w[secondary], working_patterns: %w[full_time], created_at: 1.day.ago + 2.minutes) - create(:vacancy, :published_slugged, contact_number: "3", visa_sponsorship_available: true, job_roles: %w[headteacher], subjects: %w[English], phases: %w[secondary], working_patterns: %w[full_time], created_at: 1.day.ago + 3.minutes) - create(:vacancy, :published_slugged, contact_number: "4", ect_status: :ect_suitable, job_roles: %w[headteacher teacher], subjects: %w[English], phases: %w[secondary], working_patterns: %w[full_time], created_at: 1.day.ago + 4.minutes) - create(:vacancy, :published_slugged, contact_number: "5", job_roles: %w[headteacher], subjects: %w[French], phases: %w[secondary], working_patterns: %w[full_time], created_at: 1.day.ago + 5.minutes) - create(:vacancy, :published_slugged, contact_number: "6", job_roles: %w[headteacher], phases: %w[primary], working_patterns: %w[full_time], created_at: 1.day.ago + 6.minutes) - create(:vacancy, :published_slugged, contact_number: "7", job_roles: %w[headteacher], phases: %w[secondary], working_patterns: %w[part_time], created_at: 1.day.ago + 7.minutes) - create(:vacancy, :published_slugged, contact_number: "8", organisations: [new_org], job_roles: %w[headteacher], phases: %w[secondary], working_patterns: %w[full_time], created_at: 1.day.ago + 8.minutes) - create(:vacancy, :published_slugged, contact_number: "9", job_title: "This is a nice job", job_roles: %w[headteacher], phases: %w[secondary], working_patterns: %w[full_time], created_at: 1.day.ago + 9.minutes) + create(:vacancy, :published_slugged, contact_number: "1", ect_status: :ect_unsuitable, job_roles: %w[teacher], subjects: %w[English], phases: %w[secondary], working_patterns: %w[full_time]) + create(:vacancy, :published_slugged, contact_number: "2", job_roles: %w[it_support], subjects: %w[English], phases: %w[secondary], working_patterns: %w[full_time]) + create(:vacancy, :published_slugged, contact_number: "3", visa_sponsorship_available: true, job_roles: %w[headteacher], subjects: %w[English], phases: %w[secondary], working_patterns: %w[full_time]) + create(:vacancy, :published_slugged, contact_number: "4", ect_status: :ect_suitable, job_roles: %w[headteacher teacher], subjects: %w[English], phases: %w[secondary], working_patterns: %w[full_time], publish_on: 1.day.ago) + create(:vacancy, :published_slugged, contact_number: "5", job_roles: %w[headteacher], subjects: %w[French], phases: %w[secondary], working_patterns: %w[full_time]) + create(:vacancy, :published_slugged, contact_number: "6", job_roles: %w[headteacher], phases: %w[primary], working_patterns: %w[full_time]) + create(:vacancy, :published_slugged, contact_number: "7", job_roles: %w[headteacher], phases: %w[secondary], working_patterns: %w[part_time]) + create(:vacancy, :published_slugged, contact_number: "8", organisations: [new_org], job_roles: %w[headteacher], phases: %w[secondary], working_patterns: %w[full_time]) + create(:vacancy, :published_slugged, contact_number: "9", job_title: "This is a Really Nice job", job_roles: %w[headteacher], phases: %w[secondary], working_patterns: %w[full_time]) end let(:new_org) { create(:school) } @@ -30,15 +30,25 @@ let(:new_org_job) { Vacancy.find_by!(contact_number: "8") } let(:nice_job) { Vacancy.find_by!(contact_number: "9") } - let(:vacancies) { [teacher_vacancy, support_vacancy, visa_job, ect_job, french_job, primary_job, part_time_job, new_org_job, nice_job] } - context "with keyword" do - let(:subscription) { create(:subscription, keyword: "nice", frequency: :daily) } + let(:subscription) { create(:subscription, keyword: keyword, frequency: :daily) } - it "only finds the nice job" do - expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, [nice_job].pluck(:id)) { mail } - expect(mail).to receive(:deliver_later) { ActionMailer::MailDeliveryJob.new } - perform_enqueued_jobs { job } + context "with plain keyword" do + let(:keyword) { "nice" } + + it "only finds the nice job" do + expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, [nice_job].pluck(:id)) { mail } + perform_enqueued_jobs { job } + end + end + + context "with keyword caps and trailing space" do + let(:keyword) { "Nice " } + + it "only finds the nice job" do + expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, [nice_job].pluck(:id)) { mail } + perform_enqueued_jobs { job } + end end end @@ -47,7 +57,6 @@ it "only finds the teaching job" do expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, [teacher_vacancy, ect_job].pluck(:id)) { mail } - expect(mail).to receive(:deliver_later) { ActionMailer::MailDeliveryJob.new } perform_enqueued_jobs { job } end end @@ -57,7 +66,6 @@ it "only finds the support job" do expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, [support_vacancy].pluck(:id)) { mail } - expect(mail).to receive(:deliver_later) { ActionMailer::MailDeliveryJob.new } perform_enqueued_jobs { job } end end @@ -67,7 +75,6 @@ it "only finds the visa job" do expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, [visa_job].pluck(:id)) { mail } - expect(mail).to receive(:deliver_later) { ActionMailer::MailDeliveryJob.new } perform_enqueued_jobs { job } end end @@ -77,7 +84,6 @@ it "only finds the ECT job" do expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, [ect_job].pluck(:id)) { mail } - expect(mail).to receive(:deliver_later) { ActionMailer::MailDeliveryJob.new } perform_enqueued_jobs { job } end end @@ -87,7 +93,6 @@ it "only finds the Maths job" do expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, [french_job].pluck(:id)) { mail } - expect(mail).to receive(:deliver_later) { ActionMailer::MailDeliveryJob.new } perform_enqueued_jobs { job } end end @@ -97,7 +102,6 @@ it "only finds the primary school job" do expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, [primary_job].pluck(:id)) { mail } - expect(mail).to receive(:deliver_later) { ActionMailer::MailDeliveryJob.new } perform_enqueued_jobs { job } end end @@ -107,7 +111,6 @@ it "only finds the part_time job" do expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, [part_time_job].pluck(:id)) { mail } - expect(mail).to receive(:deliver_later) { ActionMailer::MailDeliveryJob.new } perform_enqueued_jobs { job } end end @@ -117,28 +120,36 @@ it "only finds the new_publisher job" do expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, [new_org_job].pluck(:id)) { mail } - expect(mail).to receive(:deliver_later) { ActionMailer::MailDeliveryJob.new } perform_enqueued_jobs { job } end end - context "with no subscription criteria" do + context "when a run exists" do let(:subscription) { create(:subscription, frequency: :daily) } - it "sends an email" do - expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, vacancies.pluck(:id)) { mail } - expect(mail).to receive(:deliver_later) { ActionMailer::MailDeliveryJob.new } + before do + subscription.alert_runs.create(run_on: Date.current) + end + + it "does not send another email" do + expect(Jobseekers::AlertMailer).to_not receive(:alert) perform_enqueued_jobs { job } end + end + end - context "when a run exists" do - let!(:run) { subscription.alert_runs.create(run_on: Date.current) } + context "with old and new vacancies" do + before do + create(:vacancy, :published_slugged, contact_number: "2", publish_on: Date.current) + create(:vacancy, :published_slugged, contact_number: "1", publish_on: 1.day.ago) + end - it "does not send another email" do - expect(Jobseekers::AlertMailer).to_not receive(:alert) - perform_enqueued_jobs { job } - end - end + let(:expected_vacancies) { [Vacancy.find_by!(contact_number: "2"), Vacancy.find_by!(contact_number: "1")] } + let(:subscription) { create(:subscription, frequency: :daily) } + + it "sends the vacancies in publish order descending" do + expect(Jobseekers::AlertMailer).to receive(:alert).with(subscription.id, expected_vacancies.pluck(:id)) { mail } + perform_enqueued_jobs { job } end end diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb index 76ae0b2f4b..a1cfb6408d 100644 --- a/spec/rails_helper.rb +++ b/spec/rails_helper.rb @@ -104,7 +104,7 @@ end end - config.before(:each) do + config.before do allow(DisableExpensiveJobs).to receive(:enabled?).and_return(false) end