From b9eda368c25547383138def261b13a4aea0f9644 Mon Sep 17 00:00:00 2001 From: David Campbell <102170536+davidcam-src@users.noreply.github.com> Date: Fri, 19 Jul 2024 14:25:47 -0400 Subject: [PATCH] HYC-1934 - Send download events to database (#1108) * add hycdownloadstat model and migration * update schema after running HycDownloadStat migration * Update DownloadAnalyticsBehavior to push stats to local db * logs, factorybot and logs in create_download_stat * changed phrasing, add new test * test for download tracking within database, rubocop * test to verify download count is incremented for existing table entries * removing repeated mocking in download controller tests and adding a function to return fileset ids * removing some duplicate code, tests for new helper function * specifying uid to avoid conflicts * helper function tests, removing tests with duplicate coverage * pr changes --- .../hyc/download_analytics_behavior.rb | 45 ++++- app/models/hyc_download_stat.rb | 12 ++ ...0240714234200_create_hyc_download_stats.rb | 20 +++ db/schema.rb | 17 +- .../hyrax/downloads_controller_spec.rb | 154 +++++++++++++++--- spec/factories/hyc_download_stats.rb | 12 ++ spec/models/hyc_download_stat_spec.rb | 45 +++++ 7 files changed, 274 insertions(+), 31 deletions(-) create mode 100644 app/models/hyc_download_stat.rb create mode 100644 db/migrate/20240714234200_create_hyc_download_stats.rb create mode 100644 spec/factories/hyc_download_stats.rb create mode 100644 spec/models/hyc_download_stat_spec.rb diff --git a/app/controllers/concerns/hyc/download_analytics_behavior.rb b/app/controllers/concerns/hyc/download_analytics_behavior.rb index 8caab4f19..3d5cde332 100644 --- a/app/controllers/concerns/hyc/download_analytics_behavior.rb +++ b/app/controllers/concerns/hyc/download_analytics_behavior.rb @@ -12,7 +12,7 @@ def track_download return end if Hyrax::Analytics.config.auth_token.present? && !request.url.match('thumbnail') - Rails.logger.debug("Recording download event for #{params[:id]}") + Rails.logger.debug("Recording download event for #{fileset_id}") medium = request.referrer.present? ? 'referral' : 'direct' client_ip = request.remote_ip @@ -34,7 +34,7 @@ def track_download apiv: '1', e_a: 'DownloadIR', e_c: @admin_set_name, - e_n: params[:id] || 'Unknown', + e_n: fileset_id, e_v: medium, _id: client_id, cip: client_ip, @@ -50,18 +50,53 @@ def track_download if response.code >= 300 Rails.logger.error("DownloadAnalyticsBehavior received an error response #{response.code} for matomo query: #{uri}") end + # Send download events to db + create_download_stat Rails.logger.debug("DownloadAnalyticsBehavior request completed #{response.code}") response.code end end + def create_download_stat + record_id_value = record_id + admin_set_id_value = admin_set_id + work_type = fetch_record.dig(0, 'has_model_ssim', 0) + date = Date.today + + Rails.logger.debug('Creating or updating hyc-download-stat database entry with the following attributes:') + Rails.logger.debug("fileset_id: #{fileset_id}, work_id: #{record_id_value}, admin_set_id: #{admin_set_id_value}, work_type: #{work_type}, date: #{date.beginning_of_month}") + + stat = HycDownloadStat.find_or_initialize_by( + fileset_id: fileset_id, + work_id: record_id_value, + admin_set_id: admin_set_id_value, + work_type: work_type, + date: date.beginning_of_month + ) + stat.download_count += 1 + if stat.save + Rails.logger.debug("Database entry for fileset_id: #{fileset_id} successfully saved with download count: #{stat.download_count}.") + else + Rails.logger.error("Failed to update database entry for fileset_id: #{fileset_id}." \ + "Errors: #{stat.errors.full_messages}") + end + end + def bot_request?(user_agent) browser = Browser.new(user_agent) browser.bot? end def fetch_record - @record ||= ActiveFedora::SolrService.get("file_set_ids_ssim:#{params[:id]}", rows: 1)['response']['docs'] + @record ||= ActiveFedora::SolrService.get("file_set_ids_ssim:#{fileset_id}", rows: 1)['response']['docs'] + end + + def fetch_admin_set + @admin_set ||= ActiveFedora::SolrService.get("title_tesim:#{@admin_set_name}", rows: 1)['response']['docs'] + end + + def admin_set_id + @admin_set_id ||= fetch_admin_set.dig(0, 'id') end def record_id @@ -72,6 +107,10 @@ def record_id end end + def fileset_id + @fileset_id ||= params[:id] || 'Unknown' + end + def record_title @record_title ||= if !fetch_record.blank? && fetch_record[0]['title_tesim'] fetch_record[0]['title_tesim'].first diff --git a/app/models/hyc_download_stat.rb b/app/models/hyc_download_stat.rb new file mode 100644 index 000000000..83d23d164 --- /dev/null +++ b/app/models/hyc_download_stat.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true +class HycDownloadStat < ApplicationRecord + scope :with_fileset_id_and_date, ->(fileset_id, start_date, end_date) { where(fileset_id: fileset_id, date: start_date..end_date) } + scope :with_work_id_and_date, ->(work_id, start_date, end_date) { where(work_id: work_id, date: start_date..end_date) } + scope :with_admin_set_id, ->(admin_set_id) { where(admin_set_id: admin_set_id) } + scope :with_work_type, ->(work_type) { where(work_type: work_type) } + + # Additional scopes for flexibility + scope :with_fileset_id, ->(fileset_id) { where(fileset_id: fileset_id) } + scope :with_work_id, ->(work_id) { where(work_id: work_id) } + scope :within_date_range, ->(start_date, end_date) { where(date: start_date..end_date) } + end diff --git a/db/migrate/20240714234200_create_hyc_download_stats.rb b/db/migrate/20240714234200_create_hyc_download_stats.rb new file mode 100644 index 000000000..fda3db13a --- /dev/null +++ b/db/migrate/20240714234200_create_hyc_download_stats.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true +class CreateHycDownloadStats < ActiveRecord::Migration[6.1] + def change + create_table :hyc_download_stats do |t| + t.string :fileset_id, null: false + t.string :work_id, null: false + t.string :admin_set_id, null: false + t.string :work_type, null: false + t.date :date, null: false + t.integer :download_count, default: 0, null: false + + t.timestamps + end + + add_index :hyc_download_stats, [:fileset_id, :date] + add_index :hyc_download_stats, [:work_id, :date] + add_index :hyc_download_stats, :admin_set_id + add_index :hyc_download_stats, :work_type + end +end diff --git a/db/schema.rb b/db/schema.rb index 9319bff32..8b4a49aa8 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2023_06_08_153601) do +ActiveRecord::Schema.define(version: 2024_07_14_234200) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -249,6 +249,21 @@ t.index ["user_id"], name: "index_file_view_stats_on_user_id" end + create_table "hyc_download_stats", force: :cascade do |t| + t.string "fileset_id", null: false + t.string "work_id", null: false + t.string "admin_set_id", null: false + t.string "work_type", null: false + t.date "date", null: false + t.integer "download_count", default: 0, null: false + t.datetime "created_at", precision: 6, null: false + t.datetime "updated_at", precision: 6, null: false + t.index ["admin_set_id"], name: "index_hyc_download_stats_on_admin_set_id" + t.index ["fileset_id", "date"], name: "index_hyc_download_stats_on_fileset_id_and_date" + t.index ["work_id", "date"], name: "index_hyc_download_stats_on_work_id_and_date" + t.index ["work_type"], name: "index_hyc_download_stats_on_work_type" + end + create_table "hyrax_collection_types", id: :serial, force: :cascade do |t| t.string "title" t.text "description" diff --git a/spec/controllers/hyrax/downloads_controller_spec.rb b/spec/controllers/hyrax/downloads_controller_spec.rb index 40b2d52fb..f7deb4c26 100644 --- a/spec/controllers/hyrax/downloads_controller_spec.rb +++ b/spec/controllers/hyrax/downloads_controller_spec.rb @@ -5,6 +5,7 @@ RSpec.describe Hyrax::DownloadsController, type: :controller do routes { Hyrax::Engine.routes } + let(:user) { FactoryBot.create(:user, uid: 'downloads_controller_test_user') } let(:spec_base_analytics_url) { 'https://analytics-qa.lib.unc.edu' } let(:spec_site_id) { '5' } let(:spec_auth_token) { 'testtoken' } @@ -13,6 +14,24 @@ 'idsite' => '5'})) .to_return(status: 200, body: '', headers: {}) end + let(:example_admin_set_id) { 'h128zk07m' } + let(:example_work_id) { '1z40m031g' } + let(:mock_admin_set) { [{ + 'has_model_ssim' => ['AdminSet'], + 'id' => 'h128zk07m', + 'title_tesim' => ['Open_Access_Articles_and_Book_Chapters']} + ] + } + let(:mock_record) { [{ + 'has_model_ssim' => ['Article'], + 'id' => '1z40m031g', + 'title_tesim' => ['Key ethical issues discussed at CDC-sponsored international, regional meetings to explore cultural perspectives and contexts on pandemic influenza preparedness and response'], + 'admin_set_tesim' => ['Open_Access_Articles_and_Book_Chapters']} + ] + } + let(:file_set) do + FactoryBot.create(:file_with_work, user: user, content: File.open("#{fixture_path}/files/image.png")) + end around do |example| # Set the environment variables for the test @@ -32,12 +51,15 @@ before do ActiveFedora::Cleaner.clean! allow(stub_matomo) - allow(Hyrax::Analytics.config).to receive(:site_id).and_return('5') + @user = user + sign_in @user + allow(controller).to receive(:fetch_record).and_return(mock_record) + allow(controller).to receive(:fetch_admin_set).and_return(mock_admin_set) + allow(Hyrax::Analytics.config).to receive(:site_id).and_return(spec_site_id) allow(SecureRandom).to receive(:uuid).and_return('555') allow(Hyrax::VirusCheckerService).to receive(:file_has_virus?) { false } end - # app/controllers/concerns/hyrax/download_analytics_behavior.rb:8 describe '#track_download' do WebMock.after_request do |request_signature, response| Rails.logger.debug("Request #{request_signature} was made and #{response} was returned") @@ -62,21 +84,14 @@ end context 'with a created work' do - let(:user) { FactoryBot.create(:user) } - before { sign_in user } - let(:file_set) do - FactoryBot.create(:file_with_work, user: user, content: File.open("#{fixture_path}/files/image.png")) - end let(:default_image) { ActionController::Base.helpers.image_path 'default.png' } - it 'can use a fake request' do - allow(Hyrax::VirusCheckerService).to receive(:file_has_virus?) { false } - allow(SecureRandom).to receive(:uuid).and_return('555') + it 'sends a download event to analytics tracking platform upon successful download' do request.env['HTTP_REFERER'] = 'http://example.com' stub = stub_request(:get, "#{spec_base_analytics_url}/matomo.php") .with(query: hash_including({'e_a' => 'DownloadIR', 'e_c' => 'Unknown', - # 'e_n' => file_set.id, + 'e_n' => file_set.id, 'e_v' => 'referral', 'urlref' => 'http://example.com', 'url' => "http://test.host/downloads/#{file_set.id}" @@ -86,18 +101,58 @@ expect(stub).to have_been_requested.times(1) # must be after the method call that creates request end + it 'records the download event in the database' do + request.env['HTTP_REFERER'] = 'http://example.com' + + expect { + get :show, params: { id: file_set.id } + }.to change { HycDownloadStat.count }.by(1) + + stat = HycDownloadStat.last + expect(stat.fileset_id).to eq(file_set.id) + expect(stat.work_id).to eq(example_work_id) + expect(stat.admin_set_id).to eq(example_admin_set_id) + expect(stat.date).to eq(Date.today.beginning_of_month) + expect(stat.download_count).to eq(1) + end + + it 'updates the download count if the record already exists' do + existing_download_count = 5 + request.env['HTTP_REFERER'] = 'http://example.com' + + existing_stat = HycDownloadStat.create!( + fileset_id: file_set.id, + work_id: example_work_id, + admin_set_id: example_admin_set_id, + work_type: 'Article', + date: Date.today.beginning_of_month, + download_count: existing_download_count + ) + + expect { + get :show, params: { id: file_set.id } + }.not_to change { HycDownloadStat.count } + + stat = HycDownloadStat.last + expect(stat.fileset_id).to eq(file_set.id) + expect(stat.work_id).to eq(example_work_id) + expect(stat.admin_set_id).to eq(example_admin_set_id) + expect(stat.date).to eq(Date.today.beginning_of_month) + expect(stat.download_count).to eq(existing_download_count + 1) + end + it 'does not track downloads for well known bot user agents' do # Testing with two well known user agents to account for potential changes in bot filtering due to updates in the Browser gem bot_user_agents = ['googlebot', 'bingbot'] bot_user_agents.each do |bot_user_agent| allow(controller.request).to receive(:user_agent).and_return(bot_user_agent) - allow(SecureRandom).to receive(:uuid).and_return('555') request.env['HTTP_REFERER'] = 'http://example.com' request.headers['User-Agent'] = bot_user_agent stub = stub_request(:get, "#{spec_base_analytics_url}/matomo.php") .with(query: hash_including({ 'e_a' => 'DownloadIR', 'e_c' => 'Unknown', + 'e_n' => file_set.id, 'e_v' => 'referral', 'urlref' => 'http://example.com', 'url' => "http://test.host/downloads/#{file_set.id}" @@ -121,7 +176,7 @@ stub = stub_request(:get, "#{spec_base_analytics_url}/matomo.php") .with(query: hash_including({'e_a' => 'DownloadIR', 'e_c' => 'Unknown', - # 'e_n' => file_set.id, + 'e_n' => file_set.id, 'e_v' => 'direct', 'urlref' => nil, 'url' => "http://test.host/downloads/#{file_set.id}" @@ -144,7 +199,6 @@ end end - # app/controllers/hyrax/downloads_controller.rb:6 describe '#set_record_admin_set' do let(:solr_response) { { response: { docs: [{ admin_set_tesim: ['admin set for download controller'] }] } }.to_json } let(:empty_solr_response) { { response: { docs: [] } }.to_json } @@ -171,20 +225,11 @@ end describe '#download_file' do - before do - @user = FactoryBot.create(:user) - sign_in @user - end - context 'with file set for download' do let(:file_set) do FactoryBot.create(:file_with_work, user: @user, content: File.open("#{fixture_path}/files/image.png")) end - before do - allow(Hyrax::VirusCheckerService).to receive(:file_has_virus?) { false } - end - it 'will add correct extension when mimetype has a different extension' do allow(MimeTypeService).to receive(:label) { 'txt' } @@ -239,15 +284,12 @@ expect(response.headers['Content-Range']).to include 'bytes 0-19101/19102' end end + context 'with file set for download without file extension' do let(:file_set) do FactoryBot.create(:file_with_work, user: @user, content: File.open("#{fixture_path}/files/no_extension")) end - before do - allow(Hyrax::VirusCheckerService).to receive(:file_has_virus?) { false } - end - it 'will add extension added when vocab provides one' do allow(MimeTypeService).to receive(:label) { 'jpg' } @@ -265,4 +307,62 @@ end end end + + describe '#fetch_record' do + it 'fetches the record from Solr' do + expect(controller.send(:fetch_record)).to eq(mock_record) + end + end + + describe '#fetch_admin_set' do + it 'fetches the admin set from Solr' do + expect(controller.send(:fetch_admin_set)).to eq(mock_admin_set) + end + end + + describe '#admin_set_id' do + it 'returns the admin set id' do + expect(controller.send(:admin_set_id)).to eq('h128zk07m') + end + end + + describe '#record_id' do + it 'returns the record id' do + expect(controller.send(:record_id)).to eq('1z40m031g') + end + + it 'returns Unknown if the record is blank' do + allow(controller).to receive(:fetch_record).and_return([]) + expect(controller.send(:record_id)).to eq('Unknown') + end + end + + describe '#fileset_id' do + it 'returns the fileset id from params' do + controller.params = { id: file_set.id } + expect(controller.send(:fileset_id)).to eq(file_set.id) + end + + it 'returns Unknown if params id is missing' do + controller.params = {} + expect(controller.send(:fileset_id)).to eq('Unknown') + end + end + + describe '#record_title' do + it 'returns the record title' do + expect(controller.send(:record_title)).to eq('Key ethical issues discussed at CDC-sponsored international, regional meetings to explore cultural perspectives and contexts on pandemic influenza preparedness and response') + end + + it 'returns Unknown if the record title is blank' do + allow(controller).to receive(:fetch_record).and_return([{ 'title_tesim' => nil }]) + expect(controller.send(:record_title)).to eq('Unknown') + end + end + + describe '#site_id' do + it 'returns the site id from ENV' do + expect(controller.send(:site_id)).to eq('5') + end + end end diff --git a/spec/factories/hyc_download_stats.rb b/spec/factories/hyc_download_stats.rb new file mode 100644 index 000000000..7bccba284 --- /dev/null +++ b/spec/factories/hyc_download_stats.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true +# spec/factories/hyc_download_stats.rb +FactoryBot.define do + factory :hyc_download_stat do + sequence(:fileset_id) { |n| n.to_s } + sequence(:work_id) { |n| n.to_s } + sequence(:admin_set_id) { |n| n.to_s } + sequence(:work_type) { |n| n.to_s } + date { Date.today.beginning_of_month } + download_count { 0 } + end +end diff --git a/spec/models/hyc_download_stat_spec.rb b/spec/models/hyc_download_stat_spec.rb new file mode 100644 index 000000000..c7315dccf --- /dev/null +++ b/spec/models/hyc_download_stat_spec.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true +require 'rails_helper' + +RSpec.describe HycDownloadStat, type: :model do + describe 'scopes' do + let!(:stat1) { FactoryBot.create(:hyc_download_stat, fileset_id: 'fs1', work_id: 'w1', admin_set_id: 'as1', work_type: 'Article', date: '2024-07-01', download_count: 10) } + let!(:stat2) { FactoryBot.create(:hyc_download_stat, fileset_id: 'fs2', work_id: 'w2', admin_set_id: 'as2', work_type: 'Dataset', date: '2024-07-01', download_count: 5) } + let!(:stat3) { FactoryBot.create(:hyc_download_stat, fileset_id: 'fs1', work_id: 'w1', admin_set_id: 'as1', work_type: 'Article', date: '2024-08-01', download_count: 20) } + + it 'returns records with a specific fileset_id and date range' do + expect(HycDownloadStat.with_fileset_id_and_date('fs1', '2024-07-01', '2024-07-31')).to include(stat1) + expect(HycDownloadStat.with_fileset_id_and_date('fs1', '2024-07-01', '2024-07-31')).not_to include(stat3) + end + + it 'returns records with a specific work_id and date range' do + expect(HycDownloadStat.with_work_id_and_date('w1', '2024-07-01', '2024-07-31')).to include(stat1) + expect(HycDownloadStat.with_work_id_and_date('w1', '2024-07-01', '2024-07-31')).not_to include(stat3) + end + + it 'returns records with a specific admin_set_id' do + expect(HycDownloadStat.with_admin_set_id('as1')).to include(stat1, stat3) + expect(HycDownloadStat.with_admin_set_id('as1')).not_to include(stat2) + end + + it 'returns records with a specific work_type' do + expect(HycDownloadStat.with_work_type('Article')).to include(stat1, stat3) + expect(HycDownloadStat.with_work_type('Article')).not_to include(stat2) + end + + it 'returns records with a specific fileset_id' do + expect(HycDownloadStat.with_fileset_id('fs1')).to include(stat1, stat3) + expect(HycDownloadStat.with_fileset_id('fs1')).not_to include(stat2) + end + + it 'returns records with a specific work_id' do + expect(HycDownloadStat.with_work_id('w1')).to include(stat1, stat3) + expect(HycDownloadStat.with_work_id('w1')).not_to include(stat2) + end + + it 'returns records within a specific date range' do + expect(HycDownloadStat.within_date_range('2024-07-01', '2024-07-31')).to include(stat1, stat2) + expect(HycDownloadStat.within_date_range('2024-07-01', '2024-07-31')).not_to include(stat3) + end + end +end