From 5810b3f3736de1002f15b5452a959bfbf0720346 Mon Sep 17 00:00:00 2001 From: Rob Brackett Date: Fri, 23 Feb 2018 13:31:00 -0800 Subject: [PATCH 1/2] Add rake task for importing annotations Use the import_annotations_from_sheet to import all the annotations an analyst has created in a given Google Sheet. This can be used to solve #61. Arguments are: 1. Google sheet ID, e.g. 1-Rq-AclS2GI_yxLmkYVY7FvTfN21KoJtxXtOXXXXXX 2. E-mail of user to attribute the annotation to 3. (optional) Name of spreadsheet tabs to import (comma-separated). If unset, all tabs will be imported. 4. (optional) Row to start at (defaults to 7) 5. (optional) Row to end at. If unset, reads all rows. When done, it'll output summary information of how many rows were added, skipped, or errored across how many tabs. --- .env.example | 4 + Gemfile | 1 + Gemfile.lock | 39 +++++++ lib/tasks/import_from_sheets.rake | 171 ++++++++++++++++++++++++++++++ 4 files changed, 215 insertions(+) create mode 100644 lib/tasks/import_from_sheets.rake diff --git a/.env.example b/.env.example index a5a57253..7d4f64b2 100644 --- a/.env.example +++ b/.env.example @@ -53,3 +53,7 @@ TOKEN_PRIVATE_KEY='MIIEogIBAAKCAQEAufNrDQRl6Gj1yuga0DVHeJ4fi+lNWtn4S8XRU8/nBwm9v # In production, set up Sentry.io for error tracking # SENTRY_DSN= + +# Set these if you are running rake tasks to import data from Google Sheets +# GOOGLE_CLIENT_ID=XYZ +# GOOGLE_CLIENT_SECRET=XYZ diff --git a/Gemfile b/Gemfile index 487e2e20..72e9e289 100644 --- a/Gemfile +++ b/Gemfile @@ -22,6 +22,7 @@ gem 'oj', '~> 3.4' gem 'sentry-raven' gem 'readthis' gem 'hiredis' +gem 'google-api-client' # See https://github.com/rails/execjs#readme for more supported runtimes # gem 'therubyracer', platforms: :ruby diff --git a/Gemfile.lock b/Gemfile.lock index 47f2733e..6b2da55e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -65,6 +65,8 @@ GEM crack (0.4.3) safe_yaml (~> 1.0.0) crass (1.0.3) + declarative (0.0.10) + declarative-option (0.1.0) devise (4.4.1) bcrypt (~> 3.0) orm_adapter (~> 0.1) @@ -82,10 +84,26 @@ GEM ffi (1.9.21) globalid (0.4.1) activesupport (>= 4.2.0) + google-api-client (0.19.8) + addressable (~> 2.5, >= 2.5.1) + googleauth (>= 0.5, < 0.7.0) + httpclient (>= 2.8.1, < 3.0) + mime-types (~> 3.0) + representable (~> 3.0) + retriable (>= 2.0, < 4.0) + googleauth (0.6.2) + faraday (~> 0.12) + jwt (>= 1.4, < 3.0) + logging (~> 2.0) + memoist (~> 0.12) + multi_json (~> 1.11) + os (~> 0.9) + signet (~> 0.7) hashdiff (0.3.7) hiredis (0.6.1) httparty (0.16.0) multi_xml (>= 0.5.2) + httpclient (2.8.3) i18n (0.9.5) concurrent-ruby (~> 1.0) jmespath (1.3.1) @@ -95,12 +113,20 @@ GEM rb-fsevent (~> 0.9, >= 0.9.4) rb-inotify (~> 0.9, >= 0.9.7) ruby_dep (~> 1.2) + little-plugger (1.1.4) + logging (2.2.2) + little-plugger (~> 1.1) + multi_json (~> 1.10) loofah (2.2.0) crass (~> 1.0.2) nokogiri (>= 1.5.9) mail (2.7.0) mini_mime (>= 0.1.1) + memoist (0.16.0) method_source (0.9.0) + mime-types (3.1) + mime-types-data (~> 3.2015) + mime-types-data (3.2016.0521) mini_mime (1.0.0) mini_portile2 (2.3.0) minitest (5.11.3) @@ -115,6 +141,7 @@ GEM mini_portile2 (~> 2.3.0) oj (3.4.0) orm_adapter (0.5.0) + os (0.9.6) parallel (1.12.1) parser (2.5.0.0) ast (~> 2.4.0) @@ -173,6 +200,10 @@ GEM redis (4.0.1) redis-namespace (1.6.0) redis (>= 3.0.4) + representable (3.0.4) + declarative (< 0.1.0) + declarative-option (< 0.2.0) + uber (< 0.2.0) responders (2.4.0) actionpack (>= 4.2.0, < 5.3) railties (>= 4.2.0, < 5.3) @@ -182,6 +213,7 @@ GEM redis-namespace (~> 1.3) sinatra (>= 0.9.2) vegas (~> 0.1.2) + retriable (3.1.1) rubocop (0.52.1) parallel (~> 1.10) parser (>= 2.4.0.2, < 3.0) @@ -205,6 +237,11 @@ GEM tilt (>= 1.1, < 3) sentry-raven (2.7.2) faraday (>= 0.7.6, < 1.0) + signet (0.8.1) + addressable (~> 2.3) + faraday (~> 0.9) + jwt (>= 1.5, < 3.0) + multi_json (~> 1.10) sinatra (2.0.1) mustermann (~> 1.0) rack (~> 2.0) @@ -227,6 +264,7 @@ GEM tilt (2.0.8) tzinfo (1.2.5) thread_safe (~> 0.1) + uber (0.1.0) uglifier (4.1.6) execjs (>= 0.3.0, < 3) unicode-display_width (1.3.0) @@ -255,6 +293,7 @@ DEPENDENCIES byebug devise dotenv-rails + google-api-client hiredis httparty jwt (~> 2.1) diff --git a/lib/tasks/import_from_sheets.rake b/lib/tasks/import_from_sheets.rake new file mode 100644 index 00000000..21b0040e --- /dev/null +++ b/lib/tasks/import_from_sheets.rake @@ -0,0 +1,171 @@ +require 'google/apis/sheets_v4' +require 'googleauth' +require 'googleauth/stores/file_token_store' + +IMPORT_TYPE = 'rake_task_v1' +OOB_URI = 'urn:ietf:wg:oauth:2.0:oob' +APPLICATION_NAME = 'Web Monitoring DB Importer' + + +desc 'Create annotations from data in analysts’ Google sheets -- only the sheet ID and user e-mail are required.' +task :import_annotations_from_sheet, [:sheet_id, :user_email, :tabs, :start_row, :end_row] => [:environment] do |_t, args| + + verbose = ENV['VERBOSE'] + client = get_client + sheet_id = args[:sheet_id] + start_row = args.fetch(:start_row, 7).to_i + end_row = args[:end_row] || '' + + user = User.find_by!(email: args[:user_email]) + + tab_count = 0 + annotated_count = 0 + skipped_count = 0 + error_count = 0 + + tabs = + if args[:tabs] + tabs = args[:tabs].split(',').collect {|name| name.strip} + else + client.get_spreadsheet(sheet_id).sheets.collect do |sheet| + sheet.properties.title + end + end + + begin + tabs.each do |tab_title| + puts "Importing spreadsheet tab '#{tab_title}'" + + rows = client.get_spreadsheet_values( + sheet_id, + "#{tab_title}!A#{start_row}:AL#{end_row}" + ).values + + rows.each_with_index do |row, index| + # Column 9 is latest-to-base + begin + change = change_for_version_url(row[9]) + rescue StandardError => error + puts "Row #{start_row + index}: #{error.message}" + error_count += 1 + end + next unless change + + change.annotate(annotation_data_for_row(row), user) + annotated_count += 1 + + puts "Annotated '#{change.version.page.url}' change '#{change.api_id}'" if verbose + end + + tab_count += 1 + end + ensure + puts "\nRESULTS:" + puts "--------" + puts "Created #{annotated_count} annotations" + puts "Skipped #{skipped_count} rows" + puts "Errored #{error_count} rows" + puts "In #{tab_count} spreadsheet tabs" + puts "" + end +end + +def change_for_version_url(url) + return nil unless url.present? + + # Handle versionista URLs + match = /versionista\.com\/\d+\/\d+\/(\d+):(\d+)/.match(url) + if match + to_version = Version.find_by!( + "source_type = 'versionista' AND source_metadata->>'version_id' = ?", + match[1] + ) + from_version = Version.find_by!( + "source_type = 'versionista' AND source_metadata->>'version_id' = ?", + match[2] + ) + return Change.between(from: from_version, to: to_version, create: true) + end + + # Handle our URLs + match = /monitoring\.envirodatagov\.org\/page\/[^\/]+\/([^\/.]+)\.\.([^\/.]+)/.match(url) + if match + from_version = Version.find(match[1]) + to_version = Version.find(match[2]) + return Change.between(from: from_version, to: to_version, create: true) + end + + raise StandardError, "Unknown change URL format: '#{url}'" +end + +def annotation_data_for_row(row) + start_index = 17 + # fields from UI project + fields = [ + ['indiv_1', :boolean], + ['indiv_2', :boolean], + ['indiv_3', :boolean], + ['indiv_4', :boolean], + ['indiv_5', :boolean], + ['indiv_6', :boolean], + ['repeat_7', :boolean], + ['repeat_8', :boolean], + ['repeat_9', :boolean], + ['repeat_10', :boolean], + ['repeat_11', :boolean], + ['repeat_12', :boolean], + ['sig_1', :boolean], + ['sig_2', :boolean], + ['sig_3', :boolean], + ['sig_4', :boolean], + ['sig_5', :boolean], + ['sig_6', :boolean], + 'notes' + ] + + data = { _importer: IMPORT_TYPE } + fields.each_with_index do |field, index| + field_name, field_type = field.is_a?(Array) ? field : [field, :text] + + value = row[start_index + index] + value = value.present? if field_type == :boolean + + data[field_name] = value + end + + data +end + +def get_client + service = Google::Apis::SheetsV4::SheetsService.new + service.client_options.application_name = APPLICATION_NAME + service.authorization = authorize_google + service +end + +def authorize_google + unless ENV['GOOGLE_CLIENT_ID'] && ENV['GOOGLE_CLIENT_SECRET'] + raise "You must have both `GOOGLE_CLIENT_ID` and `GOOGLE_CLIENT_SECRET` environment variables set." + end + + scope = Google::Apis::SheetsV4::AUTH_SPREADSHEETS_READONLY + client_id = Google::Auth::ClientId.new( + ENV['GOOGLE_CLIENT_ID'], ENV['GOOGLE_CLIENT_SECRET']) + token_store = Google::Auth::Stores::FileTokenStore.new(file: Tempfile.new) + authorizer = Google::Auth::UserAuthorizer.new(client_id, scope, token_store) + + user_id = 'default' + credentials = authorizer.get_credentials(user_id) + if credentials.nil? + url = authorizer.get_authorization_url( + base_url: OOB_URI) + puts "Open the following URL in the browser and enter the " + + "resulting code after authorization:" + puts url + code = STDIN.gets.strip + credentials = authorizer.get_and_store_credentials_from_code( + user_id: user_id, code: code, base_url: OOB_URI) + end + + credentials +end From 3de03ad56e39d48bcafa29005adb3893f773f5ee Mon Sep 17 00:00:00 2001 From: Rob Brackett Date: Fri, 23 Feb 2018 13:47:00 -0800 Subject: [PATCH 2/2] Obey Rubocop --- lib/tasks/import_from_sheets.rake | 42 +++++++++++++++++-------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/lib/tasks/import_from_sheets.rake b/lib/tasks/import_from_sheets.rake index 21b0040e..fb50a601 100644 --- a/lib/tasks/import_from_sheets.rake +++ b/lib/tasks/import_from_sheets.rake @@ -2,19 +2,18 @@ require 'google/apis/sheets_v4' require 'googleauth' require 'googleauth/stores/file_token_store' -IMPORT_TYPE = 'rake_task_v1' -OOB_URI = 'urn:ietf:wg:oauth:2.0:oob' -APPLICATION_NAME = 'Web Monitoring DB Importer' +IMPORT_TYPE = 'rake_task_v1'.freeze +OOB_URI = 'urn:ietf:wg:oauth:2.0:oob'.freeze +APPLICATION_NAME = 'Web Monitoring DB Importer'.freeze -desc 'Create annotations from data in analysts’ Google sheets -- only the sheet ID and user e-mail are required.' +desc 'Create annotations from data in analysts’ Google sheets -- only sheet ID & user e-mail are required.' task :import_annotations_from_sheet, [:sheet_id, :user_email, :tabs, :start_row, :end_row] => [:environment] do |_t, args| - verbose = ENV['VERBOSE'] - client = get_client sheet_id = args[:sheet_id] start_row = args.fetch(:start_row, 7).to_i end_row = args[:end_row] || '' + client = sheets_client user = User.find_by!(email: args[:user_email]) @@ -25,7 +24,7 @@ task :import_annotations_from_sheet, [:sheet_id, :user_email, :tabs, :start_row, tabs = if args[:tabs] - tabs = args[:tabs].split(',').collect {|name| name.strip} + args[:tabs].split(',').collect(&:strip) else client.get_spreadsheet(sheet_id).sheets.collect do |sheet| sheet.properties.title @@ -60,13 +59,14 @@ task :import_annotations_from_sheet, [:sheet_id, :user_email, :tabs, :start_row, tab_count += 1 end ensure - puts "\nRESULTS:" - puts "--------" + puts '' + puts 'RESULTS:' + puts '--------' puts "Created #{annotated_count} annotations" puts "Skipped #{skipped_count} rows" puts "Errored #{error_count} rows" puts "In #{tab_count} spreadsheet tabs" - puts "" + puts '' end end @@ -136,7 +136,7 @@ def annotation_data_for_row(row) data end -def get_client +def sheets_client service = Google::Apis::SheetsV4::SheetsService.new service.client_options.application_name = APPLICATION_NAME service.authorization = authorize_google @@ -145,26 +145,30 @@ end def authorize_google unless ENV['GOOGLE_CLIENT_ID'] && ENV['GOOGLE_CLIENT_SECRET'] - raise "You must have both `GOOGLE_CLIENT_ID` and `GOOGLE_CLIENT_SECRET` environment variables set." + raise 'You must have both `GOOGLE_CLIENT_ID` and `GOOGLE_CLIENT_SECRET` environment variables set.' end - scope = Google::Apis::SheetsV4::AUTH_SPREADSHEETS_READONLY client_id = Google::Auth::ClientId.new( - ENV['GOOGLE_CLIENT_ID'], ENV['GOOGLE_CLIENT_SECRET']) + ENV['GOOGLE_CLIENT_ID'], + ENV['GOOGLE_CLIENT_SECRET'] + ) + scope = Google::Apis::SheetsV4::AUTH_SPREADSHEETS_READONLY token_store = Google::Auth::Stores::FileTokenStore.new(file: Tempfile.new) authorizer = Google::Auth::UserAuthorizer.new(client_id, scope, token_store) user_id = 'default' credentials = authorizer.get_credentials(user_id) if credentials.nil? - url = authorizer.get_authorization_url( - base_url: OOB_URI) - puts "Open the following URL in the browser and enter the " + - "resulting code after authorization:" + url = authorizer.get_authorization_url(base_url: OOB_URI) + puts 'Open the following URL in your browser and enter the ' \ + 'resulting code after authorization:' puts url code = STDIN.gets.strip credentials = authorizer.get_and_store_credentials_from_code( - user_id: user_id, code: code, base_url: OOB_URI) + user_id: user_id, + code: code, + base_url: OOB_URI + ) end credentials