From bf356b9774168774ecd4c8e6e2b4c5b3580d6989 Mon Sep 17 00:00:00 2001 From: Daniel Schmidt Date: Fri, 22 Dec 2023 08:59:23 -0800 Subject: [PATCH] DEV-429: Fix GeoServer references re-writing - Fixes how GeoServer reference URLs are constructed. We now pass only the root path to the geoserver, omitting the /rest/ suffix pointing to the API. It's assumed that the API lives at /rest/ and the web service URLs live at /wms/ and /wfs/ - Refactors the GeoserverPublisher to incorporate much of the logic currently present in Cli or ImportUtil. The test suite has been updated and fleshed out accordingly. more stash more fixes gets suite passing --- Gemfile | 2 + Gemfile.lock | 6 ++ docker-compose.yml | 6 +- lib/gingr/cli.rb | 52 +++++------- lib/gingr/config.rb | 10 ++- lib/gingr/data_handler.rb | 2 + lib/gingr/geoserver_publisher.rb | 115 ++++++++++++++++++------- lib/gingr/import_util.rb | 49 +++-------- lib/gingr/solr_indexer.rb | 23 +++-- spec/fixture/.DS_Store | Bin 6148 -> 8196 bytes spec/fixture/zipfile/.DS_Store | Bin 6148 -> 6148 bytes spec/geoserver_publisher_spec.rb | 139 +++++++++++++++++++++++++++++++ spec/import_util_spec.rb | 18 ++-- spec/solr_indexer_spec.rb | 6 +- 14 files changed, 302 insertions(+), 126 deletions(-) create mode 100644 spec/geoserver_publisher_spec.rb diff --git a/Gemfile b/Gemfile index 891f47d..01dd72f 100644 --- a/Gemfile +++ b/Gemfile @@ -16,3 +16,5 @@ group :test do end gem "listen", "~> 3.8" + +gem "pry", "~> 0.14.2" diff --git a/Gemfile.lock b/Gemfile.lock index a28ef1b..70b2b2f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -39,6 +39,7 @@ GEM ougai (~> 1.8) bigdecimal (3.1.4) builder (3.2.4) + coderay (1.1.3) colorize (0.8.1) concurrent-ruby (1.2.2) connection_pool (2.4.1) @@ -89,6 +90,7 @@ GEM loofah (2.21.4) crass (~> 1.0.2) nokogiri (>= 1.12.0) + method_source (1.0.0) minitest (5.20.0) mutex_m (0.1.2) net-http-persistent (4.0.2) @@ -102,6 +104,9 @@ GEM oj (3.16.1) ougai (1.9.1) oj (~> 3.10) + pry (0.14.2) + coderay (~> 1.1) + method_source (~> 1.0) psych (5.1.1.1) stringio public_suffix (5.0.3) @@ -181,6 +186,7 @@ DEPENDENCIES geo_combine geoserver-publish (~> 0.7.0) listen (~> 3.8) + pry (~> 0.14.2) rsolr rspec (~> 3.12) rubyzip diff --git a/docker-compose.yml b/docker-compose.yml index 32682cf..c164b85 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,7 +12,7 @@ services: environment: SPATIAL_URL: http://spatial GEOSERVER_ROOT: data/geoserver/ - GEOSERVER_SECURE_URL: http://admin:geoserver@geoserver_secure:8080/geoserver/rest/ + GEOSERVER_SECURE_URL: http://admin:geoserver@geoserver-secure:8080/geoserver/rest/ GEOSERVER_URL: http://admin:geoserver@geoserver:8080/geoserver/rest/ SOLR_URL: http://solr:8983/solr/geodata-test SPATIAL_ROOT: data/spatial/ @@ -35,7 +35,7 @@ services: ports: - 80:80 volumes: - - ./data/spatial:/usr/local/apache2/htdocs/ + - ./data/spatial:/usr/local/apache2/htdocs/:ro geoserver: image: containers.lib.berkeley.edu/gis/geoserver/v2.23.2 @@ -44,7 +44,7 @@ services: volumes: - ./data/geoserver/public:/srv/geofiles:delegated - geoserver_secure: + geoserver-secure: image: containers.lib.berkeley.edu/gis/geoserver/v2.23.2 ports: - 8081:8080 diff --git a/lib/gingr/cli.rb b/lib/gingr/cli.rb index 5ff147e..27cb250 100644 --- a/lib/gingr/cli.rb +++ b/lib/gingr/cli.rb @@ -13,13 +13,19 @@ class Cli < Thor Thor.check_unknown_options! + class << self + def exit_on_failure? + true + end + end + desc 'watch', 'Watches a Gingr directory for files ready to be processed' long_desc <<-TEXT, wrapping: false EXAMPLES gingr watch data/gingr --solr-url=https://foo:bar@solr.lib.berkeley.edu:8983/solr/geodata ... TEXT option :solr_url - option :update_reference_field, type: :boolean, default: false + option :update_reference_field, type: :boolean, default: true option :spatial_root option :spatial_url option :geoserver_root @@ -42,7 +48,7 @@ def watch(root_dir = nil) option :spatial_url option :geoserver_url option :geoserver_secure_url - option :update_reference_field, type: :boolean, default: false + option :update_reference_field, type: :boolean, default: true option :solr_url def solr(directory) reference_urls = ImportUtil.get_reference_urls(options) @@ -50,7 +56,7 @@ def solr(directory) solr.index_directory(directory) end - desc 'geoserver', 'publish a giving shapefile or GeoTIFF file to a geoserver' + desc 'geoserver', 'publish a given shapefile or GeoTIFF file to a geoserver' long_desc <<-TEXT, wrapping: false examples: \n 1) ruby bin/import geoserver fk4cr7f93g.shp \n @@ -60,20 +66,13 @@ def solr(directory) option :is_public, type: :boolean, default: true def geoserver(filename) url = options[:geoserver_url] - url ||= if options[:is_public] - ENV.fetch('GEOSERVER_URL', Config.default_options[:geoserver_url]) - else - ENV.fetch( - 'GEOSERVER_SECURE_URL', Config.default_options[:geoserver_secure_url] - ) - end - publisher = GeoserverPublisher.new(url) - publisher.update(filename) - logger.info("'#{filename}' - published to geoserver #{url} successfully") + default = options[:is_public] ? :geoserver_url : :geoserver_secure_url + publisher = GeoserverPublisher.new(url, default:) + publisher.publish(filename) end desc 'unpack', - 'unpack a giving zip file, move shapefiles and GeoTIFF files to geoserver_root, other files to spatial_root' + 'unpack a given zip file, move shapefiles and GeoTIFF files to geoserver_root, other files to spatial_root' long_desc <<-TEXT, wrapping: false * When giving a zip file without path, it will look for a zip file under /app/import/ TEXT @@ -92,7 +91,7 @@ def unpack(zipfile) end desc 'all', - 'unpack a giving zip file, move files, index json files to solr and publish geofiles to geoservers' + 'unpack a given zip file, move files, index json files to solr and publish geofiles to geoservers' long_desc <<-TEXT, wrapping: false 1) move all geofiles to geoserver_root \n 2) move all data.zip, ISO19139.xml and document files to spatial_root \n @@ -111,7 +110,8 @@ def all(zipfile) solr(unpacked[:extract_to_path]) geofile_names = unpacked[:geofile_name_hash] - ImportUtil.publish_geoservers(geofile_names, options) + geoserver_urls = options.slice(:geoserver_url, :geoserver_secure_url).transform_keys(&:to_sym) + Gingr::GeoserverPublisher.publish_inventory(geofile_names, **geoserver_urls) logger.info("#{zipfile} - all imported") end @@ -121,22 +121,10 @@ def all(zipfile) LONGDESC option :geoserver_url option :is_public, type: :boolean, default: true - def geoserver_workspace(name) - url = options[:geoserver_url] - url ||= if options[:is_public] - ENV.fetch('GEOSERVER_URL', Config.default_options[:geoserver_url]) - else - ENV.fetch( - 'GEOSERVER_SECURE_URL', Config.default_options[:geoserver_secure_url] - ) - end - publisher = GeoserverPublisher.new(url) - publisher.create_workspace(name) - logger.info("geoserver workspace '#{name}' - created successfully") - end - - def self.exit_on_failure? - true + def geoserver_workspace(workspace_name = nil) + default = options[:is_public] ? :geoserver_url : :geoserver_secure_url + publisher = GeoserverPublisher.new(options[:geoserver_url], default:, workspace_name:) + publisher.create_workspace end end end diff --git a/lib/gingr/config.rb b/lib/gingr/config.rb index 98f5b08..fc63fe7 100644 --- a/lib/gingr/config.rb +++ b/lib/gingr/config.rb @@ -4,7 +4,7 @@ module Gingr module Config # value: urls populated for reference field in pre-ingestion tool @reference_urls = { - geoserver_secure_url: 'https://geoservices-secure.lib.berkeley.edu/', + geoserver_secure_url: 'https://geoservices-secure.lib.berkeley.edu/geoserver/', geoserver_url: 'https://geoservices.lib.berkeley.edu/geoserver/', spatial_url: 'https://spatial.lib.berkeley.edu/' } @@ -14,8 +14,8 @@ module Config # default options for commands @default_options = { - geoserver_secure_url: 'http://admin:geoserver@geoserver_secure:8080/geoserver/rest/', - geoserver_url: 'http://admin:geoserver@geoserver:8080/geoserver/rest/', + geoserver_secure_url: 'http://admin:geoserver@geoserver-secure:8080/geoserver/', + geoserver_url: 'http://admin:geoserver@geoserver:8080/geoserver/', spatial_url: 'https://spatial.lib.berkeley.edu', spatial_root: 'data/spatial/', geoserver_root: 'data/geoserver/', @@ -27,6 +27,10 @@ class << self attr_accessor :geofile_ingestion_dirname, :reference_urls, :default_options include Config + + def getopt(optname) + ENV[optname.upcase.to_s] || default_options[optname.downcase.to_sym] + end end end end diff --git a/lib/gingr/data_handler.rb b/lib/gingr/data_handler.rb index 5223959..222e624 100644 --- a/lib/gingr/data_handler.rb +++ b/lib/gingr/data_handler.rb @@ -15,6 +15,8 @@ module DataHandler @processing_root = '' class << self + include Logging + attr_accessor :spatial_root, :geoserver_root, :processing_root def extract_and_move(zip_file) diff --git a/lib/gingr/geoserver_publisher.rb b/lib/gingr/geoserver_publisher.rb index 77a8e64..58af918 100644 --- a/lib/gingr/geoserver_publisher.rb +++ b/lib/gingr/geoserver_publisher.rb @@ -1,55 +1,110 @@ # frozen_string_literal: true require 'geoserver/publish' require 'uri' +require_relative 'config' require_relative 'logging' module Gingr class GeoserverPublisher include Logging - def initialize(url) - uri = URI(url) - @conn = Geoserver::Publish::Connection.new({ - 'url' => rest_url(uri), - 'user' => uri.user, - 'password' => uri.password.to_s, - }) - end + DEFAULT_REMOTE_ROOT = '/srv/geofiles' + DEFAULT_WORKSPACE = 'UCB' + + attr_reader :connection + attr_reader :remote_root + attr_reader :workspace_name + + class << self + def publish_inventory(inventory, geoserver_url: nil, geoserver_secure_url: nil) + if !inventory[:public].empty? + public_publisher = new(geoserver_url) + public_publisher.batch_publish(inventory[:public]) + end + + if !inventory[:ucb].empty? + secure_publisher = new(geoserver_secure_url, default: :geoserver_secure_url) + secure_publisher.batch_publish(inventory[:ucb]) + end + end - def update(filename) - name = File.basename(filename, '.*') - filepath = "file:///srv/geofiles/berkeley-#{name}/#{filename}" - File.extname(filename).downcase == '.shp' ? publish_shapefile(filepath, name) : publish_geotiff(filepath, name) - rescue Geoserver::Publish::Error => e - logger.error("Publish Geoserver error: #{filename} -- #{e.inspect}") - raise + def parse_connection_string(geoserver_baseurl) + uri = URI.parse(geoserver_baseurl) + uri.path << '/' unless uri.path.end_with? '/' + uri.path << 'rest/' unless uri.path.end_with? 'rest/' + + return URI::Generic.build( + scheme: uri.scheme, + host: uri.host, + port: uri.port == uri.default_port ? nil : uri.port, + path: uri.path, + fragment: uri.fragment, + query: uri.query, + ).to_s, uri.user, uri.password + end end - def publish_shapefile(filepath, name) - Geoserver::Publish.shapefile(connection: @conn, workspace_name: 'UCB', file_path: filepath, - id: name, title: name) + def initialize(conn = nil, default: nil, remote_root: nil, workspace_name: nil) + conn ||= Gingr::Config.getopt(default || :geoserver_url) + + # Coerce a connection string into an actual connection object + if conn.kind_of? String + rest_url, user, password = self.class.parse_connection_string(conn) + conn = Geoserver::Publish::Connection.new({ + 'url' => rest_url, + 'user' => user, + 'password' => password, + }) + end + + @connection = conn + @remote_root = (remote_root || DEFAULT_REMOTE_ROOT).chomp '/' + @workspace_name = workspace_name || DEFAULT_WORKSPACE end - def publish_geotiff(filepath, name) - Geoserver::Publish.geotiff(connection: @conn, workspace_name: 'UCB', file_path: filepath, id: name, - title: name) + def batch_publish(filenames) + filenames.each(&method(:publish)) end - def batch_update(filename_list) - filename_list.each { |filename| update(filename) } + def publish(filename) + id = File.basename(filename, '.*') + file_path = remote_filepath(id, filename) + if File.extname(filename).casecmp('.shp') + publish_shapefile(file_path, id) + else + publish_geotiff(file_path, id) + end end - def create_workspace(name) - workspace = Geoserver::Publish::Workspace.new(@conn) - workspace.create(workspace_name: name) + def create_workspace + logger.info("Creating workspace #{workspace_name} in #{geoserver_url}") + + workspace = Geoserver::Publish::Workspace.new(connection) + if workspace.find(workspace_name:) + logger.debug("Workspace #{workspace_name} already exists") + else + workspace.create(workspace_name:) + end end private - def rest_url(uri) - uri_port = uri.port.to_s - port = uri_port.start_with?('80') ? ":#{uri_port}" : '' - "#{uri.scheme}://#{uri.host}#{port}#{uri.path}" + def publish_shapefile(file_path, id) + logger.debug("Publishing shapefile #{id} to #{geoserver_url}") + Geoserver::Publish.shapefile(connection:, workspace_name:, file_path:, id:, title: id) + end + + def publish_geotiff(file_path, id) + logger.debug("Publishing geotiff #{id} to #{geoserver_url}") + Geoserver::Publish.geotiff(connection:, workspace_name:, file_path:, id:, title: id) + end + + def remote_filepath(id, filename) + "file://#{remote_root}/berkeley-#{id}/#{filename}" + end + + def geoserver_url + connection.config['url'] end end end diff --git a/lib/gingr/import_util.rb b/lib/gingr/import_util.rb index 980fe74..0e4b00a 100644 --- a/lib/gingr/import_util.rb +++ b/lib/gingr/import_util.rb @@ -13,16 +13,11 @@ module ImportUtil include Logging class << self - def publish_geoservers(geofile_names, options) - publish_geoserver_files(geofile_names[:public], options[:geoserver_url], true) - publish_geoserver_files(geofile_names[:ucb], options[:geoserver_secure_url], false) - end - def get_reference_urls(options) {}.tap do |refs| if options[:update_reference_field] - Config.reference_urls.each_key do |key| - refs[key] = reference_url(key, options) + Config.reference_urls.keys.map do |k| + refs[k] = reference_uri(k, options).to_s end end end @@ -34,40 +29,18 @@ def root_path private - def publish_geoserver_files(files, url, is_public) - return if files.empty? - - url ||= if is_public - ENV.fetch('GEOSERVER_URL', - Config.default_options[:geoserver_url]) - else - ENV.fetch('GEOSERVER_SECURE_URL', - Config.default_options[:geoserver_secure_url]) - end - publisher = GeoserverPublisher.new(url) - publisher.batch_update(files) - end - - def geo_url(url) - uri = URI(url) - uri_port = uri.port.to_s - port = uri_port.start_with?('80') ? ":#{uri_port}" : '' - "#{uri.scheme}://#{uri.host}#{port}" + def reference_uri(key, options) + prepare_url options[key] || Gingr::Config.getopt(key) end - def add_trailing_slash(url) - original_uri = URI.parse(url) - original_uri.path += '/' unless original_uri.path.end_with?('/') - original_uri - end - - def reference_url(key, options) - default_option_value = Config.default_options[key] - new_url = options[key] || ENV.fetch(key.to_s.upcase, default_option_value) - new_url = geo_url(new_url) if %w[geoserver_url geoserver_secure_url].include?(key.to_s) - add_trailing_slash(new_url).to_s + def prepare_url(url) + URI.parse(url).tap do |uri| + uri.user = nil + uri.password = nil + uri.path << '/' unless uri.path.end_with? '/' + uri.path.chomp! 'rest/' + end end - end end end diff --git a/lib/gingr/solr_indexer.rb b/lib/gingr/solr_indexer.rb index 9d9a97c..2240e0e 100644 --- a/lib/gingr/solr_indexer.rb +++ b/lib/gingr/solr_indexer.rb @@ -6,20 +6,26 @@ module Gingr class SolrIndexer - include Config + include Logging attr_accessor :reference_urls attr_accessor :solr - def initialize(solr = nil, reference_urls = nil) - solr ||= ENV['SOLR_URL'] || Gingr::Config.default_options[:solr_url] - solr = RSolr.connect url: solr, adapter: :net_http_persistent if solr.kind_of? String - @solr = solr - @reference_urls = reference_urls || {} + def initialize(connection = nil, refurls = nil) + connection ||= Gingr::Config.getopt(:solr_url) + connection = RSolr.connect url: connection, adapter: :net_http_persistent if connection.kind_of? String + @solr = connection + + # Strip HTTP Basic Auth + @reference_urls = (refurls || {}).transform_values do |url| + URI(url).tap { |uri| uri.password = uri.user = nil }.to_s + end end def add(doc) doc = JSON.load_file(doc) if doc.kind_of? String + + logger.debug("Indexing document: #{doc['id']}") update_reference_urls!(doc) @solr.add doc end @@ -33,7 +39,10 @@ def index_directory(directory) def update_reference_urls!(doc) Gingr::Config.reference_urls.each do |name, from_url| to_url = @reference_urls[name] - doc['dct_references_s'].gsub!(from_url, to_url) if to_url + if to_url + logger.debug("Updating dct_references_s from #{from_url} to #{to_url}") + doc['dct_references_s'].gsub!(from_url, to_url) + end end end diff --git a/spec/fixture/.DS_Store b/spec/fixture/.DS_Store index 24d04bb56f5edcc55348d25d098a3d225a75d4cf..4bf405079c77581bca341b5ebf85979f8b7512a8 100644 GIT binary patch literal 8196 zcmeHMU2GIp6u#e3=*&RdTZC!5P+h6vA1omM5Vn6{`4PgFwv^(syEDLq=}g&~-6907 zAqI>@qtQn&MvcB;!UH_uPc$)sSIx$VCPs~t7>)9%Pk8R!S)?sIkr)!@Cik9u&(F*~ z=YD5q&n#mM?L~7LV~vb4mAasODh+pOLN5A*rbL`7q9A+5Z00a4H{uWHoC(@7BaT2E zfj9zj1mXz95qJiLsPn@tR4`VA}TQ;%}JgV>O@l!9hFp?Ge~oW=+1~L6ohvtyJV=& zkdibW;|RnNm>2<(KGp0vRqn&=;GOlmmm|sL+^}Q&L2K(bP$pGPo>Hw=t5ene>BHVg zI>-h^zdc*nCssY4=j1B$Y_8X~MlyUxhvx=a+qDXzfo)o(GT3LkrZ>{=6f}jHo!Ro_`rVl$&XDV!k!~P*6tJ3cZoNFw zC&w>!eX5b}(R(v|dWkl^b2zX)*YC9oKG6)X^U1;PyyxyJSVG>H z_nhL8>t}d(D-$HZ$>9%lswFS8r(R>fTYVsjcJn({+7-qT~klLE9?q8L`da@ZPMSw_MY9 z5A~I7-_AK!kDDE`_-~q4=CyS-MuX9$@)otfn6r--?clj`OE zig+HUaR#s8EZ)L{!ykWHAG94b7G4oN8BdgY@ZsSP; zu!6ZeaU8;T%-M$U4bx>D?#f0(V^cz(-CW73|FTSG-5$cdbxalD<<*h)<8xHPJ)ayc z=gy<$Ha?}?)YxDwSeVec!Y`>>DAg0K%QVU)J}Fu*TuDpFg}!#3Mmfc+%0^>DlfF^k zq)|3=Ca;_5=u{~!_?&|1O^7{!Y?g_rOuUc>8n18?FS&f{&ogLetP7jOk1 zk3;3wv8cqC6H!@87hK1450RvT<*`Qb>OLbdN!mAx`oHVm-~aFPF0pwWfj9yWas*J- zneJ?-9p39XtB@>7{Qz~*gx;v6p$QE_MexINoapU83~8Q>WT}XbN=g!H|NDmk@uN3> N|HtqD5Pc8t@Gp9l9XbF2 delta 178 zcmZp1XfcprU|?W$DortDU=RQ@Ie-{MGjdEU6q~50$jCG?VE1GL8J5ZX0#b}SCu<25 zYDiXB8ygwvC>WX>n$_wkR9hMu=qQ*No7L8Ga)>JHTL;Bw=j7()cTYYbAkWydd9qju zwxd G!wdk~$&kzt j&rk#vF9P#Hs!}GaG0KysWAj4BG`5KilAGB%{_+C=BnKTT delta 138 zcmZoMXfc@J&&|TXz`)4BAi%&7KQU^fG>FU1P{NSPPz)sF844Im8Il-s7%~}>C)+T} v8{m*jDNfEw%FoXM>SkbLC