Merge pull request #4398 from galaxyproject/buffalo-mooneye

Add workflowhub to cross .* workflow list page
galaxyproject · Nov 15, 2023 · eb5dfbf · eb5dfbf
2 parents 98ea497 + e037e8b
commit eb5dfbf
Show file tree

Hide file tree

Showing 4 changed files with 22,496 additions and 5 deletions.
diff --git a/.github/workflows/workflow-list.yaml b/.github/workflows/workflow-list.yaml
@@ -0,0 +1,53 @@
+name: "[Cron] Update Workflow List"
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: '45 0 * * 2'
+jobs:
+  runner-job:
+    runs-on: ubuntu-latest
+    # Only run on main repo on and PRs that match the main repo.
+    if: |
+      github.repository == 'galaxyproject/training-material' &&
+      (github.event_name != 'pull_request' ||
+       github.event.pull_request.head.repo.full_name == github.repository)
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 10
+
+      # BEGIN Dependencies
+      - uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: "3.0"
+      - uses: actions/cache@v2
+        with:
+          path: |
+            vendor/bundle
+          key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-gems-
+      - name: Install dependencies
+        run: |
+          gem install bundler
+          bundle config path vendor/bundle
+          bundle install --jobs 4 --retry 3
+          bundle pristine ffi
+      # END Dependencies
+
+      - name: Update Shortlinks
+        id: generate
+        run: |
+          echo "new_ids=$(ruby bin/workflows-fetch.rb)" >> $GITHUB_OUTPUT
+
+      - name: Create Pull Request
+        # If it's not a Pull Request then commit any changes as a new PR.
+        if: |
+          github.event_name != 'pull_request' &&
+          steps.generate.outputs.new_ids != ''
+        uses: peter-evans/create-pull-request@v3
+        with:
+          title: Update Persistent uniform resource locators
+          branch-suffix: timestamp
+          commit-message: Update PURLs
+          add-paths: metadata/workflows.yaml
diff --git a/bin/workflows-fetch.rb b/bin/workflows-fetch.rb
@@ -4,16 +4,22 @@
 require 'uri'
 require 'yaml'
 
-# Get the list of workflows
-def fetch_workflows(server)
-  uri = URI.parse("#{server}/api/workflows/")
+def request(url)
+  uri = URI.parse(url)
   request = Net::HTTP::Get.new(uri)
+  request['Accept'] = 'application/json'
   req_options = {
     use_ssl: uri.scheme == 'https',
   }
   response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
     http.request(request)
   end
+  response
+end
+
+# Get the list of workflows
+def fetch_workflows(server)
+  response = request("#{server}/api/workflows/")
 
   begin
     JSON.parse(response.body).map do |w|
@@ -25,17 +31,77 @@ def fetch_workflows(server)
   end
 end
 
+def fetch_workflowhub()
+  projects = JSON.parse(request("https://workflowhub.eu/projects").body)
+  project_mapping = projects['data'].map{|p| [p['id'], p['attributes']['title']]}.to_h
+
+  response = request("https://workflowhub.eu/workflows?filter[workflow_type]=galaxy")
+  data = JSON.parse(response.body)
+  if !data['links']['next'].nil?
+    puts "ERROR: Cannot yet handle multiple pages"
+    exit 42
+  end
+  puts "INFO: Fetching #{data['data'].length} workflows from WorkflowHub"
+  data['data'].map.with_index { |w, i|
+    # {"id"=>"14", "type"=>"workflows", "attributes"=>{"title"=>"Cheminformatics - Docking"}, "links"=>{"self"=>"/workflows/14"}}
+    wf_info = JSON.parse(request("https://workflowhub.eu#{w['links']['self']}").body)
+    creator_list = []
+
+    creator0 = wf_info['data']['attributes']['creators'][0]
+    owner = ""
+    if !creator0.nil?
+      # Primary
+      creator_list.push(creator0['given_name'] + " " + creator0['family_name'])
+    else
+      # Other creators
+      other = wf_info['data']['attributes']['other_creators']
+      if !other.nil? && other.length.positive?
+        creator_list.push(wf_info['data']['attributes']['other_creators'].split(',').map{|x| x.strip})
+      else
+      end
+    end
+    # Projects
+    wf_info['data']['relationships']['projects']['data'].each do |p|
+      creator_list.push(project_mapping[p['id']])
+    end
+
+    creator_list = creator_list.flatten.compact.uniq
+
+    begin
+      r = {
+        'name' => wf_info['data']['attributes']['title'],
+        'owner' => creator_list.join(', '),
+        'number_of_steps' => wf_info['data']['attributes']['internals']['steps'].length,
+        'server' => 'https://workflowhub.eu',
+        'id' => wf_info['data']['id'],
+        'tags' => wf_info['data']['attributes']['tags'].map{|t| t.gsub(/^name:/, '')},
+        'update_time' => wf_info['data']['attributes']['updated_at'],
+      }
+    rescue
+      r = nil
+    end
+    r
+  }.reject{|x| x.nil? }
+end
+
+
 # Parse the response
 workflows_eu = fetch_workflows('https://usegalaxy.eu')
-workflows_org = fetch_workflows('https://usegalaxy.org')
-workflows_aus = fetch_workflows('https://usegalaxy.org.au')
+puts "INFO: Fetched #{workflows_eu.length} workflows from EU"
+workflows_org = fetch_workflows("https://usegalaxy.org")
+puts "INFO: Fetched #{workflows_org.length} workflows from ORG"
+workflows_aus = fetch_workflows("https://usegalaxy.org.au")
+puts "INFO: Fetched #{workflows_aus.length} workflows from AUS"
 workflows = workflows_eu + workflows_org + workflows_aus
 
 # Cleanup the list
 workflows.filter! do |w|
   w['published'] == true && w['importable'] == true && w['deleted'] == false && w['hidden'] == false
 end
 
+# Add in WFHub workflows
+workflows += fetch_workflowhub()
+
 # Group by name + owner
 cleaned = workflows.group_by { |w| "#{w['name']}<WFID>#{w['owner']}" }
 cleaned = cleaned.map do |_k, v|