diff --git a/bin/gingr b/bin/gingr index d0722a8..79275c9 100755 --- a/bin/gingr +++ b/bin/gingr @@ -1,115 +1,4 @@ #!/usr/bin/env ruby -require 'find' -require 'json' -require 'thor' -require_relative '../lib/data_handler' -require_relative '../lib/geoserver_publisher' -require_relative '../lib/import_util' -require_relative '../lib/solr_indexer' +require_relative '../lib/gingr' -# Gingr module -module Gingr - # ingestion tasks - class Import < Thor - include Config - include ImportUtil - - Thor.check_unknown_options! - - desc 'solr', - 'Giving a directory path, it will index all json files from the directory/sub-directory to solr' - long_desc <<-TEXT, wrapping: false - examples:\n - 1) ruby bin/import solr tmp/test_public \n - 2) ruby bin/import solr tmp/test_public --no-update_reference_field \n - (it will update reference urls from 'dct_references_s' field in each geoblacklight json file \n - with current download_url, geoserver_url, geoserver_secure_url) - TEXT - option :download_url - option :geoserver_url - option :geoserver_secure_url - option :update_reference_field, type: :boolean, default: false - option :solr_url - def solr(dir_path) - reference_urls = ImportUtil.get_reference_urls(options) - solr_url = options[:solr_url] || ENV.fetch('SOLR_URL', nil) - ImportUtil.index_solr_from_dir(dir_path, solr_url, reference_urls) - txt = "all json files under '#{dir_path}' and subdirectories have been indexed to solr #{solr_url} successfully" - Config.logger.info(txt) - end - - desc 'geoserver', 'publish a giving shapefile or GeoTIFF file to a geoserver' - long_desc <<-TEXT, wrapping: false - examples: \n - 1) ruby bin/import geoserver fk4cr7f93g.shp \n - 2) ruby bin/import geoserver fk4h14n50v.shp --no-is-public - TEXT - option :geoserver_url - option :is_public, type: :boolean, default: true - def geoserver(filename) - url = options[:geoserver_url] - url ||= options[:is_public] ? ENV.fetch('GEOSERVER_URL', nil) : ENV.fetch('GEOSERVER_SECURE_URL', nil) - publisher = GeoserverPublisher.new(url) - publisher.update(filename) - Config.logger.info("'#{filename}' - published to geoserver #{url} successfully") - end - - desc 'unpack', - 'unpack a giving zip file, move shapefiles and GeoTIFF files to geoserver_root, other files to spatial_root' - long_desc <<-TEXT, wrapping: false - * When giving a zip file without path, it will look for a zip file under /app/import/ - TEXT - option :spatial_root - option :geoserver_root - def unpack(zipfile) - zipfile_path = zipfile == File.basename(zipfile) ? File.join(ImportUtil.root_path, 'import', zipfile) : zipfile - DataHandler.spatial_root = options[:spatial_root] || ENV.fetch('SPATIAL_ROOT', nil) - DataHandler.geoserver_root = options[:geoserver_root] || ENV.fetch('GEOSERVER_ROOT', nil) - - temp_path = File.join(Dir.pwd, 'tmp') - DataHandler.extract_and_move(zipfile_path, temp_path) - end - - desc 'all', - 'unpack a giving zip file, move files, index json files to solr and publish geofiles to geoservers' - long_desc <<-TEXT, wrapping: false - 1) move all geofiles to geoserver_root \n - 2) move all data.zip, ISO19139.xml and document files to spatial_root \n - 2) index all geoblacklight json files to solr \n - 3) publish all shapefiles and GeoTIFF files to geoserver \n - TEXT - option :solr_url - option :update_reference_field, type: :boolean, default: false - option :spatial_root - option :geoserver_root - option :geoserver_url - option :geoserver_secure_url - def all(zipfile) - unpacked = unpack(zipfile) - solr(unpacked[:extract_to_path]) - - geofile_names = unpacked[:geofile_name_hash] - ImportUtil.publish_geoservers(geofile_names, options) - Config.logger.info("#{zipfile} - all imported") - end - - desc 'geoserver_workspace', 'create a workspace in a geoserver' - long_desc <<-LONGDESC - This is for spec test. Geodata website only needs one workspace "UCB" - LONGDESC - option :geoserver_url - option :is_public, type: :boolean, default: true - def geoserver_workspace(name) - url = options[:geoserver_url] - url ||= options[:is_public] ? ENV.fetch('GEOSERVER_URL', nil) : ENV.fetch('GEOSERVER_SECURE_URL', nil) - publisher = GeoserverPublisher.new(url) - publisher.create_workspace(name) - Config.logger.info("geoserver workspace '#{name}' - created successfully") - end - - def self.exit_on_failure? - true - end - end -end -Gingr::Import.start(ARGV) +Gingr::Cli.start(ARGV) diff --git a/bin/gingr-examples.sh b/bin/gingr-examples.sh index 6b80c94..2a69139 100755 --- a/bin/gingr-examples.sh +++ b/bin/gingr-examples.sh @@ -2,28 +2,28 @@ # unpack zipfile echo "1 - unpack zipfile starting..." -ruby bin/import unpack spec/fixture/zipfile/test_public.zip +ruby bin/gingr unpack spec/fixture/zipfile/test_public.zip echo "unpack zipfile ends" # index solr echo "2 - index solr starting..." -ruby bin/import solr tmp/test_public/ +ruby bin/gingr solr tmp/test_public/ echo "index solr ends" # publish geoserver (need testing both public and restricted geoserver?) echo "3.1 - create geoserver workspace ..." -ruby bin/import geoserver_workspace UCB +ruby bin/gingr geoserver_workspace UCB echo "create create geoserver workspace ends" echo "3.2 - publish vector to geoserver starting..." -ruby bin/import geoserver fk4cr7f93g.shp +ruby bin/gingr geoserver fk4cr7f93g.shp echo "publish vector to geoserver ends" # echo "3.3 - publish raster to geoserver starting..." -# ruby bin/import geoserver bbbb.TIFF +# ruby bin/gingr geoserver bbbb.TIFF # echo "publish raster to geoserver ends" # import all with a zipfile echo "4 - import all starting..." -ruby bin/import all spec/fixture/zipfile/test_public.zip +ruby bin/gingr all spec/fixture/zipfile/test_public.zip echo "import all ends" diff --git a/lib/gingr.rb b/lib/gingr.rb new file mode 100644 index 0000000..bb26ca8 --- /dev/null +++ b/lib/gingr.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +# monkey-patch first +require_relative 'monkeypatch/geoserver/publish' + +require_relative 'gingr/cli' +require_relative 'gingr/config' +require_relative 'gingr/data_handler' +require_relative 'gingr/geoserver_publisher' +require_relative 'gingr/import_util' +require_relative 'gingr/solr_indexer' + +module Gingr + # +end diff --git a/lib/gingr/cli.rb b/lib/gingr/cli.rb new file mode 100644 index 0000000..5ca6260 --- /dev/null +++ b/lib/gingr/cli.rb @@ -0,0 +1,108 @@ +# frozen_string_literal: true +require 'thor' +require_relative 'config' +require_relative 'import_util' + +module Gingr + class Cli < Thor + include Config + include ImportUtil + + Thor.check_unknown_options! + + desc 'solr', + 'Giving a directory path, it will index all json files from the directory/sub-directory to solr' + long_desc <<-TEXT, wrapping: false + examples:\n + 1) ruby bin/import solr tmp/test_public \n + 2) ruby bin/import solr tmp/test_public --no-update_reference_field \n + (it will update reference urls from 'dct_references_s' field in each geoblacklight json file \n + with current download_url, geoserver_url, geoserver_secure_url) + TEXT + option :download_url + option :geoserver_url + option :geoserver_secure_url + option :update_reference_field, type: :boolean, default: false + option :solr_url + def solr(dir_path) + reference_urls = ImportUtil.get_reference_urls(options) + solr_url = options[:solr_url] || ENV.fetch('SOLR_URL', nil) + ImportUtil.index_solr_from_dir(dir_path, solr_url, reference_urls) + txt = "all json files under '#{dir_path}' and subdirectories have been indexed to solr #{solr_url} successfully" + Config.logger.info(txt) + end + + desc 'geoserver', 'publish a giving shapefile or GeoTIFF file to a geoserver' + long_desc <<-TEXT, wrapping: false + examples: \n + 1) ruby bin/import geoserver fk4cr7f93g.shp \n + 2) ruby bin/import geoserver fk4h14n50v.shp --no-is-public + TEXT + option :geoserver_url + option :is_public, type: :boolean, default: true + def geoserver(filename) + url = options[:geoserver_url] + url ||= options[:is_public] ? ENV.fetch('GEOSERVER_URL', nil) : ENV.fetch('GEOSERVER_SECURE_URL', nil) + publisher = GeoserverPublisher.new(url) + publisher.update(filename) + Config.logger.info("'#{filename}' - published to geoserver #{url} successfully") + end + + desc 'unpack', + 'unpack a giving zip file, move shapefiles and GeoTIFF files to geoserver_root, other files to spatial_root' + long_desc <<-TEXT, wrapping: false + * When giving a zip file without path, it will look for a zip file under /app/import/ + TEXT + option :spatial_root + option :geoserver_root + def unpack(zipfile) + zipfile_path = zipfile == File.basename(zipfile) ? File.join(ImportUtil.root_path, 'import', zipfile) : zipfile + DataHandler.spatial_root = options[:spatial_root] || ENV.fetch('SPATIAL_ROOT', nil) + DataHandler.geoserver_root = options[:geoserver_root] || ENV.fetch('GEOSERVER_ROOT', nil) + + temp_path = File.join(Dir.pwd, 'tmp') + DataHandler.extract_and_move(zipfile_path, temp_path) + end + + desc 'all', + 'unpack a giving zip file, move files, index json files to solr and publish geofiles to geoservers' + long_desc <<-TEXT, wrapping: false + 1) move all geofiles to geoserver_root \n + 2) move all data.zip, ISO19139.xml and document files to spatial_root \n + 2) index all geoblacklight json files to solr \n + 3) publish all shapefiles and GeoTIFF files to geoserver \n + TEXT + option :solr_url + option :update_reference_field, type: :boolean, default: false + option :spatial_root + option :geoserver_root + option :geoserver_url + option :geoserver_secure_url + def all(zipfile) + unpacked = unpack(zipfile) + solr(unpacked[:extract_to_path]) + + geofile_names = unpacked[:geofile_name_hash] + ImportUtil.publish_geoservers(geofile_names, options) + Config.logger.info("#{zipfile} - all imported") + end + + desc 'geoserver_workspace', 'create a workspace in a geoserver' + long_desc <<-LONGDESC + This is for spec test. Geodata website only needs one workspace "UCB" + LONGDESC + option :geoserver_url + option :is_public, type: :boolean, default: true + def geoserver_workspace(name) + url = options[:geoserver_url] + url ||= options[:is_public] ? ENV.fetch('GEOSERVER_URL', nil) : ENV.fetch('GEOSERVER_SECURE_URL', nil) + publisher = GeoserverPublisher.new(url) + publisher.create_workspace(name) + Config.logger.info("geoserver workspace '#{name}' - created successfully") + end + + def self.exit_on_failure? + true + end + end +end diff --git a/lib/config.rb b/lib/gingr/config.rb similarity index 99% rename from lib/config.rb rename to lib/gingr/config.rb index a71df71..f3ab1c2 100644 --- a/lib/config.rb +++ b/lib/gingr/config.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true - require 'berkeley_library/logging' # Gingr diff --git a/lib/data_handler.rb b/lib/gingr/data_handler.rb similarity index 98% rename from lib/data_handler.rb rename to lib/gingr/data_handler.rb index 9516e67..3498fa8 100644 --- a/lib/data_handler.rb +++ b/lib/gingr/data_handler.rb @@ -1,13 +1,10 @@ # frozen_string_literal: true - -require 'zip' +require 'fileutils' require 'pathname' +require 'zip' require_relative 'config' -require 'fileutils' -# Ginger module module Gingr - # handle ingestion data module DataHandler include Gingr::Config diff --git a/lib/geoserver_publisher.rb b/lib/gingr/geoserver_publisher.rb similarity index 93% rename from lib/geoserver_publisher.rb rename to lib/gingr/geoserver_publisher.rb index 185d698..c4cc199 100644 --- a/lib/geoserver_publisher.rb +++ b/lib/gingr/geoserver_publisher.rb @@ -1,14 +1,12 @@ # frozen_string_literal: true - -require_relative 'publish' +require_relative '../monkeypatch/geoserver/publish' require 'uri' require_relative 'config' -# Ginger module module Gingr - include Gingr::Config - # publish services to geoserver class GeoserverPublisher + include Gingr::Config + def initialize(url) uri = URI(url) @conn = Geoserver::Publish::Connection.new({ 'url' => rest_url(uri), 'user' => uri.user, diff --git a/lib/import_util.rb b/lib/gingr/import_util.rb similarity index 98% rename from lib/import_util.rb rename to lib/gingr/import_util.rb index acb9440..93aa1bb 100644 --- a/lib/import_util.rb +++ b/lib/gingr/import_util.rb @@ -1,15 +1,13 @@ # frozen_string_literal: true - +require 'uri' require_relative 'config' -require_relative 'solr_indexer' require_relative 'geoserver_publisher' - -require 'uri' +require_relative 'solr_indexer' module Gingr - # util methods for bin/import module ImportUtil include Gingr::Config + class << self def publish_geoservers(geofile_names, options) publish_geoserver_files(geofile_names[:public], options[:geoserver_url], true) diff --git a/lib/solr_indexer.rb b/lib/gingr/solr_indexer.rb similarity index 95% rename from lib/solr_indexer.rb rename to lib/gingr/solr_indexer.rb index 11cf16a..55483c1 100644 --- a/lib/solr_indexer.rb +++ b/lib/gingr/solr_indexer.rb @@ -1,14 +1,12 @@ # frozen_string_literal: true - -require 'rsolr' require 'faraday/net_http_persistent' +require 'rsolr' require_relative 'config' -# Ginger module module Gingr - include Gingr::Config - # index solr for Gingr class SolrIndexer + include Gingr::Config + attr_reader :solr, :need_update_reference_urls def initialize(url, reference_urls = {}) diff --git a/lib/publish.rb b/lib/monkeypatch/geoserver/publish.rb similarity index 63% rename from lib/publish.rb rename to lib/monkeypatch/geoserver/publish.rb index 446d66e..4c7169d 100644 --- a/lib/publish.rb +++ b/lib/monkeypatch/geoserver/publish.rb @@ -1,26 +1,10 @@ # frozen_string_literal: true +require 'geoserver/publish' -require 'erb' -require 'faraday' -require 'json' -require 'yaml' - +# Monkey-patch geoserver-publish gem to prefix everything with berkeley_ +# @note Is this really necessary? module Geoserver - # from geoserver-publish gem: get a specific store name module Publish - require 'geoserver/publish/config' - require 'geoserver/publish/connection' - require 'geoserver/publish/coverage' - require 'geoserver/publish/coverage_store' - require 'geoserver/publish/create' - require 'geoserver/publish/data_store' - require 'geoserver/publish/feature_type' - require 'geoserver/publish/geowebcache' - require 'geoserver/publish/layer' - require 'geoserver/publish/style' - require 'geoserver/publish/version' - require 'geoserver/publish/workspace' - def self.delete_geotiff(workspace_name:, id:, connection: nil) coverage_store_name = "berkeley_#{id}" CoverageStore.new(connection).delete(workspace_name:, coverage_store_name:) @@ -47,12 +31,5 @@ def self.shapefile(workspace_name:, file_path:, id:, title: nil, connection: nil create_feature_type(workspace_name:, data_store_name:, feature_type_name: id, title:, connection:) end - - def self.root - Pathname.new(File.expand_path('../..', __dir__)) - end - - class Error < StandardError - end end end diff --git a/spec/import_util_spec.rb b/spec/import_util_spec.rb index bd74fe1..7f87beb 100644 --- a/spec/import_util_spec.rb +++ b/spec/import_util_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true - require 'spec_helper' -require_relative '../lib/import_util' RSpec.describe Gingr::ImportUtil do let(:reference_urls) do diff --git a/spec/solr_indexer_spec.rb b/spec/solr_indexer_spec.rb index 32262e6..d18d41d 100644 --- a/spec/solr_indexer_spec.rb +++ b/spec/solr_indexer_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true - require 'spec_helper' -require_relative '../lib/solr_indexer' RSpec.describe Gingr::SolrIndexer do let(:url) { 'http://solr:8983/solr/geodata-test' } diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 4a323fa..6f14699 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,96 +1,15 @@ -# This file was generated by the `rspec --init` command. Conventionally, all -# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. -# The generated `.rspec` file contains `--require spec_helper` which will cause -# this file to always be loaded, without a need to explicitly require it in any -# files. -# -# Given that it is always loaded, you are encouraged to keep this file as -# light-weight as possible. Requiring heavyweight dependencies from this file -# will add to the boot time of your test suite on EVERY test run, even for an -# individual file that may not need all of that loaded. Instead, consider making -# a separate helper file that requires the additional dependencies and performs -# the additional setup, and require it from the spec files that actually need -# it. -# -# See https://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration +# frozen_string_literal: true +$LOAD_PATH.unshift(File.expand_path('../lib', __FILE__)) +require 'gingr' + RSpec.configure do |config| - # rspec-expectations config goes here. You can use an alternate - # assertion/expectation library such as wrong or the stdlib/minitest - # assertions if you prefer. config.expect_with :rspec do |expectations| - # This option will default to `true` in RSpec 4. It makes the `description` - # and `failure_message` of custom matchers include text for helper methods - # defined using `chain`, e.g.: - # be_bigger_than(2).and_smaller_than(4).description - # # => "be bigger than 2 and smaller than 4" - # ...rather than: - # # => "be bigger than 2" expectations.include_chain_clauses_in_custom_matcher_descriptions = true end - # rspec-mocks config goes here. You can use an alternate test double - # library (such as bogus or mocha) by changing the `mock_with` option here. config.mock_with :rspec do |mocks| - # Prevents you from mocking or stubbing a method that does not exist on - # a real object. This is generally recommended, and will default to - # `true` in RSpec 4. mocks.verify_partial_doubles = true end - # This option will default to `:apply_to_host_groups` in RSpec 4 (and will - # have no way to turn it off -- the option exists only for backwards - # compatibility in RSpec 3). It causes shared context metadata to be - # inherited by the metadata hash of host groups and examples, rather than - # triggering implicit auto-inclusion in groups with matching metadata. config.shared_context_metadata_behavior = :apply_to_host_groups - - # The settings below are suggested to provide a good initial experience - # with RSpec, but feel free to customize to your heart's content. - # # This allows you to limit a spec run to individual examples or groups - # # you care about by tagging them with `:focus` metadata. When nothing - # # is tagged with `:focus`, all examples get run. RSpec also provides - # # aliases for `it`, `describe`, and `context` that include `:focus` - # # metadata: `fit`, `fdescribe` and `fcontext`, respectively. - # config.filter_run_when_matching :focus - # - # # Allows RSpec to persist some state between runs in order to support - # # the `--only-failures` and `--next-failure` CLI options. We recommend - # # you configure your source control system to ignore this file. - # config.example_status_persistence_file_path = "spec/examples.txt" - # - # # Limits the available syntax to the non-monkey patched syntax that is - # # recommended. For more details, see: - # # https://rspec.info/features/3-12/rspec-core/configuration/zero-monkey-patching-mode/ - # config.disable_monkey_patching! - # - # # This setting enables warnings. It's recommended, but in some cases may - # # be too noisy due to issues in dependencies. - # config.warnings = true - # - # # Many RSpec users commonly either run the entire suite or an individual - # # file, and it's useful to allow more verbose output when running an - # # individual spec file. - # if config.files_to_run.one? - # # Use the documentation formatter for detailed output, - # # unless a formatter has already been configured - # # (e.g. via a command-line flag). - # config.default_formatter = "doc" - # end - # - # # Print the 10 slowest examples and example groups at the - # # end of the spec run, to help surface which specs are running - # # particularly slow. - # config.profile_examples = 10 - # - # # Run specs in random order to surface order dependencies. If you find an - # # order dependency and want to debug it, you can fix the order by providing - # # the seed, which is printed after each run. - # # --seed 1234 - # config.order = :random - # - # # Seed global randomization in this process using the `--seed` CLI option. - # # Setting this allows you to use `--seed` to deterministically reproduce - # # test failures related to randomization by passing the same `--seed` value - # # as the one that triggered the failure. - # Kernel.srand config.seed end