diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 00000000..212d5dcd --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,85 @@ +# Workflow for deploying ontologies_api to stage/prod systems via capistrano. +# This workflow runs after a successeful execution of the unit test workflow and it +# can also be triggered manually. +# +# Required github secrets: +# +# CONFIG_REPO - github repo containing config and customizations for the API. Format 'author/private_config_repo' +# it is used for getting capistrano deployment configuration for stages on the github actions runner and +# PRIVATE_CONFIG_REPO env var is constructed from it which is used by capistrano on the remote servers for pulling configs. +# +# GH_PAT - github Personal Access Token for accessing PRIVATE_CONFIG_REPO +# +# SSH_JUMPHOST - ssh jump/proxy host though which deployments have to though if app servers are hosted on private network. +# +# DEPLOY_ENC_KEY - key for decrypting deploymnet ssh key residing in config/deploy_id_rsa_enc (see miloserdow/capistrano-deploy) +# this SSH key is used for accessing jump host, UI nodes, and private github repo. + +name: Capistrano Deployment +# Controls when the action will run. +on: + # Trigger deployment to staging after unit test action completes + workflow_run: + workflows: ["Ruby Unit Tests"] + types: + - completed + branches: [master, develop] + # Allows running this workflow manually from the Actions tab + workflow_dispatch: + branches: [master, develop] + inputs: + BRANCH: + description: 'Branch/tag to deploy' + default: develop + required: true + environment: + description: 'target environment to deploy to' + type: choice + options: + - staging + - production + default: staging + +jobs: + deploy: + runs-on: ubuntu-latest + # run deployment only if "Ruby Unit Tests" workflow completes sucessefully or when manually triggered + if: ${{ (github.event.workflow_run.conclusion == 'success') || (github.event_name == 'workflow_dispatch') }} + env: + BUNDLE_WITHOUT: default #install gems required primarily for the deployment in order to speed this workflow + PRIVATE_CONFIG_REPO: ${{ format('git@github.com:{0}.git', secrets.CONFIG_REPO) }} + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + - name: set branch/tag and environment to deploy from inputs + run: | + # workflow_dispatch default input doesn't get set on push so we need to set defaults + # via shell parameter expansion + # https://dev.to/mrmike/github-action-handling-input-default-value-5f2g + USER_INPUT_BRANCH=${{ inputs.branch }} + echo "BRANCH=${USER_INPUT_BRANCH:-develop}" >> $GITHUB_ENV + USER_INPUT_ENVIRONMENT=${{ inputs.environment }} + echo "TARGET=${USER_INPUT_ENVIRONMENT:-staging}" >> $GITHUB_ENV + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v3 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: 2.7.6 # Not needed with a .ruby-version file + bundler-cache: true # runs 'bundle install' and caches installed gems automatically + - name: get-deployment-config + uses: actions/checkout@v3 + with: + repository: ${{ secrets.CONFIG_REPO }} # repository containing deployment settings + token: ${{ secrets.GH_PAT }} # `GH_PAT` is a secret that contains your PAT + path: deploy_config + - name: copy-deployment-config + run: cp -r deploy_config/ontologies_api/* . + # add ssh hostkey so that capistrano doesn't complain + - name: Add jumphost's hostkey to Known Hosts + run: | + mkdir -p ~/.ssh + ssh-keyscan -H ${{ secrets.SSH_JUMPHOST }} > ~/.ssh/known_hosts + shell: bash + - uses: miloserdow/capistrano-deploy@master + with: + target: ${{ env.TARGET }} # which environment to deploy + deploy_key: ${{ secrets.DEPLOY_ENC_KEY }} # Name of the variable configured in Settings/Secrets of your github project diff --git a/.github/workflows/ruby-unit-tests.yml b/.github/workflows/ruby-unit-tests.yml index 76a3cc01..6b2c973d 100644 --- a/.github/workflows/ruby-unit-tests.yml +++ b/.github/workflows/ruby-unit-tests.yml @@ -11,7 +11,7 @@ jobs: backend: ['api'] # api runs tests with 4store backend and api-agraph runs with AllegroGraph backend runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Build docker-compose run: docker-compose --profile 4store build #profile flag is set in order to build all containers in this step - name: Run unit tests diff --git a/Gemfile b/Gemfile index 209034b3..0c1b4223 100644 --- a/Gemfile +++ b/Gemfile @@ -38,7 +38,7 @@ gem 'unicorn' gem 'unicorn-worker-killer' # Templating -gem 'haml', '~> 5.2.2' +gem 'haml', '~> 5.2.2' # pin see https://github.com/ncbo/ontologies_api/pull/107 gem 'redcarpet' # NCBO gems (can be from a local dev path or from rubygems/git) @@ -50,10 +50,13 @@ gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'master' gem 'ontologies_linked_data', git: 'https://github.com/ontoportal-lirmm/ontologies_linked_data.git', branch: 'development' group :development do + # bcrypt_pbkdf and ed35519 is required for capistrano deployments when using ed25519 keys; see https://github.com/miloserdow/capistrano-deploy/issues/42 + gem 'bcrypt_pbkdf', '>= 1.0', '< 2.0', require: false gem 'capistrano', '~> 3', require: false gem 'capistrano-bundler', require: false gem 'capistrano-locally', require: false gem 'capistrano-rbenv', require: false + gem 'ed25519', '>= 1.2', '< 2.0', require: false gem 'pry' gem 'shotgun', github: 'palexander/shotgun', branch: 'ncbo' end diff --git a/Gemfile.lock b/Gemfile.lock index f68658b6..1cec69a9 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,18 +11,17 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/goo.git - revision: 295d794acb6b685be5560b61554c8e8ab6f98fa9 + revision: 3d5bbe1db4a6aca2ff621ccfcdb85a32dbe9704e branch: development specs: goo (0.0.2) - addressable (= 2.3.5) + addressable (~> 2.8) pry rdf (= 1.0.8) redis rest-client rsolr sparql-client - systemu uuid GIT @@ -38,7 +37,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ncbo_cron.git - revision: 8c1d82a24005fca1ebd661cf0fbad581ecc50fdf + revision: c8e268c83c8fa9464518eccca5bcebcff848bcf2 branch: development specs: ncbo_cron (0.0.1) @@ -54,7 +53,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git - revision: b5678553c42ff0b58080cd6de1607990c8b28881 + revision: 5422b84d390b1f3ff387ce3ed62dc6331698dd50 branch: development specs: ontologies_linked_data (0.0.1) @@ -104,11 +103,13 @@ GEM activesupport (3.2.22.5) i18n (~> 0.6, >= 0.6.4) multi_json (~> 1.0) - addressable (2.3.5) + addressable (2.8.1) + public_suffix (>= 2.0.2, < 6.0) airbrussh (1.4.1) sshkit (>= 1.6.1, != 1.7.0) backports (3.23.0) bcrypt (3.1.18) + bcrypt_pbkdf (1.1.0) bigdecimal (1.4.2) builder (3.2.4) capistrano (3.17.1) @@ -127,10 +128,12 @@ GEM concurrent-ruby (1.1.10) cube-ruby (0.0.3) dante (0.2.0) + date (3.3.2) declarative (0.0.20) docile (1.4.0) domain_name (0.5.20190701) unf (>= 0.0.5, < 1.0.0) + ed25519 (1.3.0) faraday (1.10.2) faraday-em_http (~> 1.0) faraday-em_synchrony (~> 1.0) @@ -157,15 +160,15 @@ GEM ffi (1.15.5) get_process_mem (0.2.7) ffi (~> 1.0) - google-api-client (0.10.3) - addressable (~> 2.3) - googleauth (~> 0.5) - httpclient (~> 2.7) - hurley (~> 0.1) - memoist (~> 0.11) - mime-types (>= 1.6) + google-api-client (0.52.0) + addressable (~> 2.5, >= 2.5.1) + googleauth (~> 0.9) + httpclient (>= 2.8.1, < 3.0) + mini_mime (~> 1.0) representable (~> 3.0) retriable (>= 2.0, < 4.0) + rexml + signet (~> 0.12) googleauth (0.17.1) faraday (>= 0.17.3, < 2.0) jwt (>= 1.4, < 3.0) @@ -180,21 +183,23 @@ GEM http-cookie (1.0.5) domain_name (~> 0.5) httpclient (2.8.3) - hurley (0.2) i18n (0.9.5) concurrent-ruby (~> 1.0) - json (2.6.2) - json-schema (2.5.0) - addressable (~> 2.3) - json_pure (2.6.2) + json (2.6.3) + json-schema (2.8.1) + addressable (>= 2.4) + json_pure (2.6.3) jwt (2.5.0) kgio (2.11.4) libxml-ruby (3.2.4) - logger (1.5.1) + logger (1.5.3) macaddr (1.7.2) systemu (~> 2.6.5) - mail (2.7.1) + mail (2.8.0) mini_mime (>= 0.1.1) + net-imap + net-pop + net-smtp memoist (0.16.2) method_source (1.0.0) mime-types (3.4.1) @@ -208,11 +213,20 @@ GEM multi_json (1.15.0) multipart-post (2.2.3) net-http-persistent (2.9.4) + net-imap (0.3.2) + date + net-protocol + net-pop (0.1.2) + net-protocol + net-protocol (0.2.1) + timeout net-scp (4.0.0) net-ssh (>= 2.6.5, < 8.0.0) + net-smtp (0.3.3) + net-protocol net-ssh (7.0.1) netrc (0.11.0) - newrelic_rpm (8.13.1) + newrelic_rpm (8.14.0) oj (2.18.5) omni_logger (0.1.4) logger @@ -223,6 +237,7 @@ GEM pry (0.14.1) coderay (~> 1.1) method_source (~> 1.0) + public_suffix (5.0.1) rack (1.6.13) rack-accept (0.4.5) rack (>= 0.4) @@ -272,9 +287,9 @@ GEM rubyzip (2.3.2) rufus-scheduler (2.0.24) tzinfo (>= 0.3.22) - signet (0.15.0) - addressable (~> 2.3) - faraday (>= 0.17.3, < 2.0) + signet (0.17.0) + addressable (~> 2.8) + faraday (>= 0.17.5, < 3.a) jwt (>= 1.5, < 3.0) multi_json (~> 1.10) simplecov (0.21.2) @@ -305,6 +320,7 @@ GEM systemu (2.6.5) temple (0.9.1) tilt (2.0.11) + timeout (0.3.1) trailblazer-option (0.1.2) tzinfo (2.0.5) concurrent-ruby (~> 1.0) @@ -323,16 +339,17 @@ GEM PLATFORMS x86_64-darwin-21 - x86_64-linux DEPENDENCIES activesupport (~> 3.0) + bcrypt_pbkdf (>= 1.0, < 2.0) bigdecimal (= 1.4.2) capistrano (~> 3) capistrano-bundler capistrano-locally capistrano-rbenv cube-ruby + ed25519 (>= 1.2, < 2.0) faraday (~> 1.9) ffi goo! @@ -374,4 +391,4 @@ DEPENDENCIES unicorn-worker-killer BUNDLED WITH - 2.3.15 + 2.3.23 diff --git a/config/deploy.rb b/config/deploy.rb index 441be857..23a982cd 100644 --- a/config/deploy.rb +++ b/config/deploy.rb @@ -1,6 +1,6 @@ # config valid only for Capistrano 3 -APP_PATH = '/srv/ncbo' +APP_PATH = '/srv/ontoportal' set :application, 'ontologies_api' set :repo_url, "https://github.com/ncbo/#{fetch(:application)}.git" @@ -77,7 +77,7 @@ namespace :deploy do - desc 'Incorporate the bioportal_conf private repository content' + desc 'Incorporate the private repository content' # Get cofiguration from repo if PRIVATE_CONFIG_REPO env var is set # or get config from local directory if LOCAL_CONFIG_PATH env var is set task :get_config do diff --git a/config/rack_attack.rb b/config/rack_attack.rb index 7256f1c7..60d2e3de 100644 --- a/config/rack_attack.rb +++ b/config/rack_attack.rb @@ -20,24 +20,32 @@ safe_accounts = LinkedData::OntologiesAPI.settings.safe_accounts ||= Set.new(%w[ncbobioportal ontoportal_ui biomixer]) -Rack::Attack.safelist('mark safe accounts such as ontoportal_ui and biomixer as safe') do |req| - req.env['REMOTE_USER'] && safe_accounts.include?(req.env['REMOTE_USER'].username) +Rack::Attack.safelist('mark safe accounts such as ontoportal_ui and biomixer as safe') do |request| + request.env['REMOTE_USER'] && safe_accounts.include?(request.env['REMOTE_USER'].username) end -Rack::Attack.safelist('mark administrators as safe') do |req| - req.env['REMOTE_USER']&.admin? +Rack::Attack.safelist('mark administrators as safe') do |request| + request.env['REMOTE_USER']&.admin? end -Rack::Attack.throttle('req/ip', limit: LinkedData::OntologiesAPI.settings.req_per_second_per_ip, - period: 1.second, &:ip) +Rack::Attack.throttle('requests by ip', + limit: LinkedData::OntologiesAPI.settings.req_per_second_per_ip, + period: 1.second +) do |request| + request.ip +end + +Rack::Attack.throttled_responder = lambda do |request| + match_data = request.env['rack.attack.match_data'] -Rack::Attack.throttled_response = lambda do |env| - match_data = env['rack.attack.match_data'] headers = { 'RateLimit-Limit' => match_data[:limit].to_s, 'RateLimit-Remaining' => '0', 'RateLimit-Reset' => match_data[:period].to_s } - body = "You have made #{match_data[:count]} requests in the last #{match_data[:period]} seconds. For user #{env['REMOTE_USER']}, we limit API Keys to #{match_data[:limit]} requests every #{match_data[:period]} seconds" + + body = "You have made #{match_data[:count]} requests in the last #{match_data[:period]} seconds. + For user #{request.env['REMOTE_USER']}, we limit API Keys to #{match_data[:limit]} requests every #{match_data[:period]} seconds\n" + [429, headers, [body]] end diff --git a/controllers/ontology_analytics_controller.rb b/controllers/ontology_analytics_controller.rb index 5113c926..8ecd77d5 100644 --- a/controllers/ontology_analytics_controller.rb +++ b/controllers/ontology_analytics_controller.rb @@ -7,6 +7,7 @@ class OntologyAnalyticsController < ApplicationController namespace "/analytics" do get do + expires 86400, :public year = year_param(params) error 400, "The year you supplied is invalid. Valid years start with 2 and contain 4 digits." if params["year"] && !year month = month_param(params) @@ -24,6 +25,7 @@ class OntologyAnalyticsController < ApplicationController namespace "/ontologies/:acronym/analytics" do get do + expires 86400, :public ont = Ontology.find(params["acronym"]).first error 404, "No ontology exists with the acronym: #{params["acronym"]}" if ont.nil? analytics = ont.analytics diff --git a/controllers/ontology_submissions_controller.rb b/controllers/ontology_submissions_controller.rb index 048fbbef..cf55659d 100644 --- a/controllers/ontology_submissions_controller.rb +++ b/controllers/ontology_submissions_controller.rb @@ -104,15 +104,16 @@ class OntologySubmissionsController < ApplicationController submission_attributes = [:submissionId, :submissionStatus, :uploadFilePath, :pullLocation] included = Ontology.goo_attrs_to_load.concat([submissions: submission_attributes]) ont = Ontology.find(acronym).include(included).first - ont.bring(:viewingRestriction) if ont.bring?(:viewingRestriction) error 422, "You must provide an existing `acronym` to download" if ont.nil? + ont.bring(:viewingRestriction) if ont.bring?(:viewingRestriction) check_access(ont) ont_restrict_downloads = LinkedData::OntologiesAPI.settings.restrict_download error 403, "License restrictions on download for #{acronym}" if ont_restrict_downloads.include? acronym submission = ont.submission(params['ontology_submission_id'].to_i) error 404, "There is no such submission for download" if submission.nil? file_path = submission.uploadFilePath - + # handle edge case where uploadFilePath is not set + error 422, "Upload File Path is not set for this submission" if file_path.to_s.empty? download_format = params["download_format"].to_s.downcase allowed_formats = ["csv", "rdf"] if download_format.empty? diff --git a/test/controllers/test_ontology_submissions_controller.rb b/test/controllers/test_ontology_submissions_controller.rb index 0098969e..7500dce4 100644 --- a/test/controllers/test_ontology_submissions_controller.rb +++ b/test/controllers/test_ontology_submissions_controller.rb @@ -125,6 +125,10 @@ def test_download_submission # Clear restrictions on downloads LinkedData::OntologiesAPI.settings.restrict_download = [] # see also test_ontologies_controller::test_download_ontology + + # Test downloads of nonexistent ontology + get "/ontologies/BOGUS66/submissions/55/download" + assert_equal(422, last_response.status, "failed to handle downloads of nonexistent ontology" + get_errors(last_response)) end def test_download_ontology_submission_rdf