Skip to content

Commit

Permalink
DP-827: Adds the watcher CLI command
Browse files Browse the repository at this point in the history
  • Loading branch information
danschmidt5189 committed Nov 15, 2023
1 parent 8a253c0 commit 6537dcf
Show file tree
Hide file tree
Showing 21 changed files with 305 additions and 60 deletions.
9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
/log/*
/zold_app/*
/data/*
/tmp/*
/import/*
.irb_history
.bash_history
.bundle/*
./local/*

/data/*
!/data/gingr
/data/gingr/ready/*
/data/gingr/processed/*
/data/gingr/failed/*

/solr/geodata-test/data
/solr/geodata-test/data
/solr/geodata/data
/solr/geodata/data

!**/.keep
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ gem 'uri'
group :test do
gem 'rspec', '~> 3.12'
end

gem "listen", "~> 3.8"
11 changes: 11 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ GEM
faraday-net_http_persistent (2.1.0)
faraday (~> 2.5)
net-http-persistent (~> 4.0)
ffi (1.16.3)
geo_combine (0.8.0)
activesupport
faraday-net_http_persistent (~> 2.0)
Expand All @@ -76,6 +77,9 @@ GEM
reline (>= 0.3.8)
json-schema (4.1.1)
addressable (>= 2.8)
listen (3.8.0)
rb-fsevent (~> 0.10, >= 0.10.3)
rb-inotify (~> 0.9, >= 0.9.10)
lograge (0.14.0)
actionpack (>= 4)
activesupport (>= 4)
Expand All @@ -88,6 +92,8 @@ GEM
mutex_m (0.1.2)
net-http-persistent (4.0.2)
connection_pool (~> 2.2)
nokogiri (1.15.4-aarch64-linux)
racc (~> 1.4)
nokogiri (1.15.4-x86_64-darwin)
racc (~> 1.4)
nokogiri (1.15.4-x86_64-linux)
Expand Down Expand Up @@ -123,6 +129,9 @@ GEM
thor (~> 1.0, >= 1.2.2)
zeitwerk (~> 2.6)
rake (13.1.0)
rb-fsevent (0.11.2)
rb-inotify (0.10.1)
ffi (~> 1.0)
rchardet (1.8.0)
rdoc (6.5.0)
psych (>= 4.0.0)
Expand Down Expand Up @@ -160,6 +169,7 @@ GEM
zeitwerk (2.6.12)

PLATFORMS
aarch64-linux
x86_64-darwin-22
x86_64-linux

Expand All @@ -168,6 +178,7 @@ DEPENDENCIES
faraday-net_http_persistent (~> 2.0)
geo_combine
geoserver-publish (~> 0.7.0)
listen (~> 3.8)
rsolr
rspec (~> 3.12)
rubyzip
Expand Down
20 changes: 18 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Command-line tool for ingesting GeoData (Solr, GeoServer, and file servers).

## Quick Start
## Quick start

The app is Dockerized with an image designed to just run the `gingr` executable:

Expand All @@ -23,5 +23,21 @@ docker compose exec app bash
# You can also run commands from your host, either via `run` or `exec`:
docker compose exec app gingr help
docker compose run --rm app gingr help
docker compose run --rm app gingr all /opt/app/spec/fixture/zipfile/test_public.zip
docker compose run --rm app gingr all spec/fixture/zipfile/vector.zip
```

## Watching a directory for new files

Gingr's `watch` command monitors a directory for new `.zip` files and automatically ingest them. It accepts the path to a gingr root directory (which should contain `ready`, `processed`, and `failed` subdirectories) and the same arguments as `gingr all`:

```
gingr watch /path/to/directory [args to gingr all]
```

Gingr will error if the directory doesn't exist or if it doesn't contain the expected subdirectories:

```
./ready
./processed
./failed
```
Empty file added data/.keep
Empty file.
Binary file added data/gingr/.DS_Store
Binary file not shown.
Empty file added data/gingr/.keep
Empty file.
Empty file added data/gingr/failed/.keep
Empty file.
Empty file added data/gingr/processed/.keep
Empty file.
Empty file added data/gingr/ready/.keep
Empty file.
17 changes: 2 additions & 15 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,6 @@ services:
- tail
- -f
- /dev/null
develop:
watch:
- path: .
target: /opt/app
ignore:
- .ruby-version
- Dockerfile
- Gemfile*
action: sync
- path: .ruby-version
action: rebuild
- path: Dockerfile
action: rebuild
- path: Gemfile*
action: rebuild
environment:
DOWNLOAD_URL: https://spatial.lib.berkeley.edu/
GEOSERVER_ROOT: data/geoserver/
Expand All @@ -32,6 +17,8 @@ services:
SOLR_URL: http://solr:8983/solr/geodata-test
SPATIAL_ROOT: data/spatial/
restart: no
volumes:
- ./:/opt/app:delegated

solr:
command: >
Expand Down
4 changes: 1 addition & 3 deletions lib/gingr.rb
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
# frozen_string_literal: true

# monkey-patch first
require_relative 'monkeypatch/geoserver/publish'

require_relative 'gingr/cli'
require_relative 'gingr/config'
require_relative 'gingr/data_handler'
require_relative 'gingr/geoserver_publisher'
require_relative 'gingr/import_util'
require_relative 'gingr/solr_indexer'
require_relative 'gingr/watcher'

module Gingr
#
Expand Down
18 changes: 18 additions & 0 deletions lib/gingr/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
require 'thor'
require_relative 'config'
require_relative 'import_util'
require_relative 'watcher'

module Gingr
class Cli < Thor
Expand All @@ -10,6 +11,23 @@ class Cli < Thor

Thor.check_unknown_options!

desc 'watch', 'Watches a Gingr directory for files ready to be processed'
long_desc <<-TEXT, wrapping: false
EXAMPLES
gingr watch data/gingr --solr-url=https://foo:[email protected]:8983/solr/geodata ...
TEXT
option :solr_url
option :update_reference_field, type: :boolean, default: false
option :spatial_root
option :geoserver_root
option :geoserver_url
option :geoserver_secure_url
def watch(root_dir = nil)
root_dir ||= ENV['GINGR_WATCH_DIRECTORY'] || '/opt/app/data/gingr'
watcher = Gingr::Watcher.new(root_dir, *options)
watcher.start!
end

desc 'solr',
'Giving a directory path, it will index all json files from the directory/sub-directory to solr'
long_desc <<-TEXT, wrapping: false
Expand Down
6 changes: 3 additions & 3 deletions lib/gingr/geoserver_publisher.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true
require_relative '../monkeypatch/geoserver/publish'
require 'geoserver/publish'
require 'uri'
require_relative 'config'

Expand All @@ -17,8 +17,8 @@ def update(filename)
name = File.basename(filename, '.*')
filepath = "file:///srv/geofiles/berkeley-#{name}/#{filename}"
File.extname(filename).downcase == '.shp' ? publish_shapefile(filepath, name) : pulsih_geotiff(filepath, name)
rescue Geoserver::Publish::Error
Config.logger.error("Publish Geoserver error: #{filename}")
rescue Geoserver::Publish::Error => e
Config.logger.error("Publish Geoserver error: #{filename} -- #{e.inspect}")
raise
end

Expand Down
3 changes: 2 additions & 1 deletion lib/gingr/import_util.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# frozen_string_literal: true
require 'find'
require 'uri'
require_relative 'config'
require_relative 'geoserver_publisher'
Expand Down Expand Up @@ -33,7 +34,7 @@ def get_reference_urls(options)

hash = {}
Config.reference_urls.each_key do |key|
url = options[key] || ENV.fetch(key.upcase)
url = options[key] || ENV.fetch(key.to_s.upcase)
hash[key] = reference_url(url) if url
end
hash
Expand Down
122 changes: 122 additions & 0 deletions lib/gingr/watcher.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# frozen_string_literal: true
require 'listen'
require 'open3'
require_relative 'config'

module Gingr
class Watcher
include Config

# Only watch for files in WATCHED_DIRECTORIES[:READY] that match this pattern
WATCH_FILTER = Regexp.compile(/\.zip$/)

WATCHED_DIRECTORIES = {
# Directory into which new files are dropped when ready for processing.
# .watch! monitors this for new .zip's.
READY: 'ready'.freeze,
# Directory into which successfully processed files are moved post-processing.
PROCESSED: 'processed'.freeze,
# Directory into which failed files are moved post-processing.
FAILED: 'failed'.freeze,
}

attr_reader :options
attr_reader :root_dir

def initialize(root_dir, *options)
# This is the Gingr root directory, not the directory to be watched.
# Watcher watches the ./ready directory under this one.
@root_dir = root_dir

# Options are passed as-is to `gingr all`, so they should match the
# arguments you'd otherwise pass to that command
@options = options

validate_directories!
end

def start!
start
sleep
end

def start
Config.logger.info("Monitoring directory for new zipfiles: #{ready_dir}")
listener.start unless listener.processing?
end

def listener
@listener ||= begin
Listen.to(ready_dir, only: WATCH_FILTER, force_polling: true) do |_, added, _|
added.each do |zipfile|
Config.logger.info("Processing zipfile: #{zipfile}")

begin
exec_gingr_all!(zipfile)
rescue => e
Config.logger.error("Error processing #{zipfile}, moving to #{failed_dir}: #{e.inspect}")
end
end
end
end
end

def exec_gingr_all!(zipfile)
begin
command = ['gingr', 'all', zipfile, *options]
Config.logger.debug("Running command: #{command}")

stdout, stderr, status = Open3.capture3(*command)
if !status.success?
raise SubprocessError, "Call to `gingr all` failed: #{status}"
end

Config.logger.debug("Processed #{zipfile}, moving to #{processed_dir}")
FileUtils.mv(zipfile, processed_dir)
rescue => e
FileUtils.mv(zipfile, failed_dir)
File.write(error_log_for(zipfile), collate_logs(stdout, stderr))
raise
end
end

def ready_dir
@ready_dir ||= File.join(@root_dir, WATCHED_DIRECTORIES[:READY])
end

def processed_dir
@processed_dir ||= File.join(@root_dir, WATCHED_DIRECTORIES[:PROCESSED])
end

def failed_dir
@failed_dir ||= File.join(@root_dir, WATCHED_DIRECTORIES[:FAILED])
end

private

def collate_logs(stdout, stderr)
"#{stdout}\n#{stderr}\n"
end

def error_log_for(zipfile)
File.join(failed_dir, "#{File.basename(zipfile, '.*')}.log")
end

def validate_directories!
WATCHED_DIRECTORIES.values
.collect { |dirname| public_send("#{dirname}_dir") }
.each &method(:validate_directory!)
end

def validate_directory!(directory)
if !File.writable?(directory)
raise DirectoryError, "Directory is not writable: #{directory}"
end
end

# Typed errors to help with tests / figuring out what exactly failed
class WatcherError < StandardError; end
class DirectoryError < WatcherError; end
class SubprocessError < WatcherError; end
end
end
35 changes: 0 additions & 35 deletions lib/monkeypatch/geoserver/publish.rb

This file was deleted.

Binary file added spec/fixture/.DS_Store
Binary file not shown.
Binary file added spec/fixture/zipfile/.DS_Store
Binary file not shown.
Loading

0 comments on commit 6537dcf

Please sign in to comment.