Skip to content

Commit

Permalink
DEV-1086: move notify.rb to cli
Browse files Browse the repository at this point in the history
  • Loading branch information
aelkiss committed Apr 30, 2024
1 parent 9f01055 commit 13735ed
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 78 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ gem "thor"
gem "rake", "~> 12.3"
gem "csv"
gem "base64"
gem "net-smtp"

group :development, :test do
gem "rspec", "~> 3.0"
Expand Down
6 changes: 6 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ GEM
lint_roller (1.1.0)
method_source (1.1.0)
mysql2 (0.5.6)
net-protocol (0.2.2)
timeout
net-smtp (0.5.0)
net-protocol
nio4r (2.7.1)
pairtree (0.3.0)
parallel (1.24.0)
Expand Down Expand Up @@ -91,6 +95,7 @@ GEM
rubocop-performance (~> 1.20.2)
thor (1.3.1)
timecop (0.9.8)
timeout (0.4.1)
unicode-display_width (2.5.0)

PLATFORMS
Expand All @@ -101,6 +106,7 @@ DEPENDENCIES
byebug
csv
mysql2
net-smtp
pairtree (~> 0.3)
pry
puma
Expand Down
69 changes: 0 additions & 69 deletions bin/notify.rb

This file was deleted.

1 change: 1 addition & 0 deletions lib/datasets.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
require "datasets/force_volume_creator"
require "datasets/htid_safe_run"
require "datasets/managed_safe_run"
require "datasets/notify"
require "datasets/pairtree_path_resolver"
require "datasets/path_resolver"
require "datasets/report"
Expand Down
36 changes: 27 additions & 9 deletions lib/datasets/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,8 @@

module Datasets
class CLI < Thor
def self.option_config
option :config,
type: :string,
default: "#{APP_ROOT}/config/config.yml",
aliases: "-c",
desc: "Path to the configuration file to use."
def self.debugging
true
end

def self.exit_on_failure?
Expand All @@ -30,8 +26,32 @@ def self.exit_on_failure?

APP_ROOT = Pathname.new(__FILE__).expand_path.parent.parent.dirname

# Global option
class_option :config,
type: :string,
default: "#{APP_ROOT}/config/config.yml",
aliases: "-c",
desc: "Path to the configuration file to use."

# Tasks
option_config

option :dry_run,
type: :boolean,
default: false,
aliases: "-n",
desc: "Preview email rather than sending"

option :smtp_host,
default: "localhost",
type: :string,
desc: "Host to use for sending email. Defaults to localhost."

desc "notify logfile ...", "Collate and send deletion notifications gathered from logs"
def notify(*files)
Datasets.config = load_config(options[:config])

Notify.new(files, dry_run: options[:dry_run], smtp_host: options[:smtp_host]).notify
end

option :start_time,
type: :string,
Expand Down Expand Up @@ -61,8 +81,6 @@ def all

default_task :all

option_config

desc "force", "Force update of a list of volumes."
def force
Datasets.config = load_config(options[:config])
Expand Down
84 changes: 84 additions & 0 deletions lib/datasets/notify.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
require "net/smtp"
require "datasets/dedupe_delete_log"

DATASET_EMAILS = [
["ht_text_pd", "dataset-pd", "pd"],
["ht_text_pd_open_access", "dataset-pd-oa", "pd_open"],
["ht_text_pd_world", "dataset-pd-world", "pd_world"],
["ht_text_pd_world_open_access", "dataset-pd-world-oa", "pd_world_open"]
]

SUPPORT_EMAIL = "[email protected]"

module Datasets
class Notify
def initialize(files,
dry_run:,
smtp_host:)
@dry_run = dry_run
@smtp_host = smtp_host
@delete_logs = Datasets::DedupeDeleteLog.new(files)
end

def notify
deletes = delete_logs.compile_results

DATASET_EMAILS.each do |subset_full_name, email, subset_short_name|
email(subset_full_name, "#{email}@hathitrust.org", deletes[subset_short_name])
end
end

private

attr_reader :dry_run, :delete_logs, :smtp_host, :files

def email_header(set_name, recipient)
<<~DOC
From: HathiTrust <#{SUPPORT_EMAIL}>
To: #{recipient}
Subject: Delete notifications for #{set_name} dataset
Dear HathiTrust dataset recipient,
This email is to notify you that volumes in the HathiTrust "#{set_name}" dataset, of which you have downloaded all or a subset of files, no longer meet the criteria for inclusion in the dataset, and you no longer are allowed to use them in your research.
Please review the data you have synced from HathiTrust to check whether you have the volumes listed below. If so, delete all copies you retain of these volumes in accordance with our terms of use. Alternatively, you may delete your copy of the dataset and re-sync to the updated dataset.
If you no longer possess HathiTrust datasets, or if you have other questions regarding datasets, then please email #{SUPPORT_EMAIL}.
Thank you,
HathiTrust
===BEGIN ID LIST===
DOC
end

def email(set_name, recipient, data)
return unless data&.count

message = email_header(set_name, recipient)

data.each do |item|
message += "#{item}\n"
end
message += "===END ID LIST===\n"

puts "sending message with #{data.count} deletes to #{recipient}"
send_or_preview(message, recipient)
end

def send_or_preview(message, recipient)
if dry_run
puts "To: #{SUPPORT_EMAIL}, #{recipient}"
puts
puts message
else
Net::SMTP.start(smtp_host) do |smtp|
smtp.send_message message, SUPPORT_EMAIL, recipient
end
end
end
end
end
10 changes: 10 additions & 0 deletions spec/cli_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,16 @@ module Datasets
$stdin = old_stdin
end
end

it "runs notification processes when given the notify flag" do
files = ["deletelog1", "deletelog2"]
notification = double("notification")
allow(Notify).to receive(:new).and_return(notification)
expect(Notify).to receive(:new).with(files, dry_run: true, smtp_host: "localhost").once
expect(notification).to receive(:notify).once

described_class.start(["notify", "--dry-run", "deletelog1", "deletelog2"])
end
end
end
end
19 changes: 19 additions & 0 deletions spec/notify_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
require_relative "spec_helper"
require "notify"

module Datasets
RSpec.describe Notify do
# needs the dataset paths there
include_context "integration" do
it "outputs an email with deletes" do
Tempfile.create("dedupe-deletes") do |f|
f.puts("pd\ttest.id1", "pd\ttest.id2")
f.close

notifier = Notify.new([f.path], dry_run: true, smtp_host: "default.invalid")
expect { notifier.notify }.to output(/Delete notification.*test\.id1.*test\.id2/m).to_stdout
end
end
end
end
end

0 comments on commit 13735ed

Please sign in to comment.