From 93a80b97e557553c241b44953b05449860db03c2 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 12:25:10 +0200 Subject: [PATCH 01/41] rename funders file to grants for clarity --- FUNDERS.yaml => GRANTS.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename FUNDERS.yaml => GRANTS.yaml (100%) diff --git a/FUNDERS.yaml b/GRANTS.yaml similarity index 100% rename from FUNDERS.yaml rename to GRANTS.yaml From 53bd356bb5c82cc2367a4a7a5690fdb5d92b476e Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 12:30:08 +0200 Subject: [PATCH 02/41] update description --- GRANTS.yaml | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/GRANTS.yaml b/GRANTS.yaml index a39b2e6a50ca77..57b9dbe02c630a 100644 --- a/GRANTS.yaml +++ b/GRANTS.yaml @@ -1,24 +1,21 @@ -# List of funders related to contributors +# List of grants funding GTN activity # -# reference funders by their id in tutorial metadata files +# Can be added to contributions->funding metadata key of materials # # valid tags: -# name, email, linkedin, twitter, gitter, orcid, bio, joined, url, -# funder, funding_id, funding_system, funding_statement +# name, joined, url, github, funding_id, funding_system, funding_statement # -# collection names should be equal to github username, if not, add github: false tag --- -# funder1: -# name: Funder A +# Grant1: # should be equal to GitHub username if one exists for the grant/project +# name: Grant/Project name # joined: 2020-06 -# url: "https://example.com" -# funder: true +# url: "https://example.com" # homepage of the project # funding_id: 2020-1-NL01-KA203-064717 -# funding_system: erasmusplus +# funding_system: erasmusplus # database/page where your # funding_statement: "This project is funded with the support of .." +# github: false # add this if the id does not map to a GitHub account, -# our real funders <3 # please add in alphabetical order of the key (id) abromics: From c867df10465aef9fc174a8dbaf90f1235c46d39f Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 12:32:15 +0200 Subject: [PATCH 03/41] remove obsolete funder key --- GRANTS.yaml | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/GRANTS.yaml b/GRANTS.yaml index 57b9dbe02c630a..697ebcb1a21426 100644 --- a/GRANTS.yaml +++ b/GRANTS.yaml @@ -24,7 +24,6 @@ abromics: joined: 2024-01 avatar: "/training-material/assets/images/abromics.png" url: https://www.abromics.fr/ - funder: true ai4life: name: AI4Life @@ -32,7 +31,6 @@ ai4life: joined: 2023-07 avatar: "/training-material/topics/ai4life/images/AI4Life-logo_giraffe-solid.png" url: https://ai4life.eurobioimaging.eu/ - funder: true funding_id: "101057970" funding_system: cordis funding_statement: | @@ -44,7 +42,6 @@ biont: joined: 2023-09 avatar: "/training-material/assets/images/BioNT_Logo.png" url: https://biont-training.eu/ - funder: true funding_id: "101100604" funding_system: cordis funding_statement: | @@ -53,7 +50,6 @@ biont: by-covid: name: BeYond-COVID joined: 2023-04 - funder: true funding_id: "101046203" funding_system: cordis funding_statement: | @@ -67,7 +63,6 @@ CINECA-Project: name: Common Infrastructure for National Cohorts in Europe, Canada, and Africa short_name: "CINECA" joined: 2023-03 - funder: true funding_id: "825775" funding_system: cordis funder_name: Horizon 2020 @@ -78,11 +73,9 @@ nfdi4plants: joined: 2024-07 url: https://www.nfdi4plants.de://www.nfdi4plants.de/ avatar: "/training-material/shared/images/logo-dataplant.svg" - funder: true deNBI: name: de.NBI - funder: true url: https://www.denbi.de/ elixir-converge: @@ -91,7 +84,6 @@ elixir-converge: joined: 2023-01 avatar: "https://elixir-europe.org/sites/default/files/styles/right-medium/public/images/converge_logo.png" url: https://elixir-europe.org/about-us/how-funded/eu-projects/converge - funder: true funding_id: "871075" funding_system: cordis funding_statement: | @@ -104,7 +96,6 @@ edctp-eu: avatar: "/training-material/shared/images/edctp-eu.png" github: false url: https://www.edctp.org/ - funder: true funding_statement: | TB-CAPT, PanACEA and PANGenS are part of the EDCTP2 and EDCTP3 programs supported by the European Union. @@ -113,14 +104,12 @@ elixir-excelerate: url: https://www.elixir-europe.org/excelerate/ avatar: "/training-material/shared/images/Excelerate_whitebackground.png" github: false - funder: true elixir-fair-data: name: "ELIXIR-UK: FAIR Data Stewardship training" short_name: "ELIXIR Fair Data" github: false joined: 2023-06 - funder: true funding_id: MR/V038966/1 funding_system: ukri funding_statement: | @@ -133,7 +122,6 @@ elixir-uk-dash: github: false avatar: "/training-material/shared/images/dash.png" url: https://elixiruknode.org/projects/elixir-uk-dash/ - funder: true funding_system: ukri funding_id: MR/V038966/1 funding_statement: | @@ -144,7 +132,6 @@ eosc-life: github: false joined: 2023-04 avatar: "https://www.eosc-life.eu/wp-content/themes/eosc-life-v2/assets/images/eosclogo.png" - funder: true funding_id: "824087" funding_system: cordis funding_statement: | @@ -157,7 +144,6 @@ epsrc-training-grant: short_name: "EPSRC/OU" github: false joined: 2022-09 - funder: true funding_statement: The research internship was supported through EPSRC Training Grant DTP 2020-2021 Open University url: "https://www.open.ac.uk/" @@ -167,7 +153,6 @@ eurosciencegateway: joined: 2023-10 avatar: "/training-material/assets/images/eurosciencegateway.png" url: https://galaxyproject.org/projects/esg/ - funder: true funding_id: "101057388" funding_system: cordis funding_statement: | @@ -180,7 +165,6 @@ fairease: joined: 2023-10 avatar: "/training-material/assets/images/fair_ease_colour.png" url: https://fairease.eu/ - funder: true funding_id: "101058785" funding_system: cordis funding_statement: | @@ -192,7 +176,6 @@ fnso2019: joined: 2023-05 avatar: "/training-material/shared/images/fnso.png" github: false - funder: true url: "https://www.ouvrirlascience.fr/national-fund-for-open-science/" funder_name: National Fund for Open Science funding_id: AAPFNSO2019OpenMetaPaper-14026 @@ -209,7 +192,6 @@ gallantries: joined: 2020-09 avatar: "https://gallantries.github.io/assets/images/gallantries-logo.png" github: false - funder: true url: "https://www.erasmusplus.nl" funder_name: Erasmus+ Programme funding_id: 2020-1-NL01-KA203-064717 @@ -225,7 +207,6 @@ h2020-defend: short_name: DEFEND github: false joined: 2023-05 - funder: true funding_id: "773701" funding_system: cordis funding_statement: | @@ -239,7 +220,6 @@ madland: joined: 2024-07 url: https://madland.science avatar: "/training-material/shared/images/logo-madland.jpg" - funder: true github: false mwk: @@ -248,7 +228,6 @@ mwk: joined: 2024-08 url: https://mwk.baden-wuerttemberg.de/de/startseite avatar: "/training-material/assets/images/MWK.png" - funder: true github: false nhgri-anvil: @@ -258,7 +237,6 @@ nhgri-anvil: joined: 2023-06 avatar: https://www.ashg.org/wp-content/uploads/2021/07/nhgri-logo-800x167-1.png url: https://www.genome.gov/Funded-Programs-Projects/Computational-Genomics-and-Data-Science-Program/Genomic-Analysis-Visualization-Informatics-Lab-space-AnVIL - funder: true funding_id: U24HG010263 nhgri-gdscn: @@ -268,7 +246,6 @@ nhgri-gdscn: joined: 2023-06 avatar: https://www.ashg.org/wp-content/uploads/2021/07/nhgri-logo-800x167-1.png url: https://www.genome.gov/ - funder: true funding_id: 75N92022P00232 NIH: @@ -276,7 +253,6 @@ NIH: short_name: "NIH" avatar: /training-material/shared/images/nih.png url: https://nih.gov/ - funder: true github: false @@ -285,7 +261,6 @@ nsf: url: https://www.nsf.gov avatar: "/training-material/shared/images/nsf.gif" github: false - funder: true sfb992: @@ -293,7 +268,6 @@ sfb992: url: https://www.sfb992.uni-freiburg.de/ avatar: https://raw.githubusercontent.com/bgruening/presentations/bce348bb606c312d531c479e63a66efc2bc38d44/shared/resources/img/MEDEP.jpg github: false - funder: true ukmrc: @@ -301,6 +275,5 @@ ukmrc: url: https://mrc.ukri.org avatar: https://raw.githubusercontent.com/nomadscientist/scrnaseq_training/master/logo.png github: false - funder: true From bd0ee81d62bb747ec7976533d57d0b37e8bd8b57 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 12:38:57 +0200 Subject: [PATCH 04/41] relabel funders to grants in scripts for clarity --- .github/workflows/ci-main.yml | 2 +- bin/gtn.rb | 4 ++-- bin/news.rb | 10 +++++----- bin/validate-contributors.rb | 10 +++++----- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci-main.yml b/.github/workflows/ci-main.yml index 11d974a2c02da7..21993c3c2934a1 100644 --- a/.github/workflows/ci-main.yml +++ b/.github/workflows/ci-main.yml @@ -17,7 +17,7 @@ on: - 'news/**' - 'events/**' - CONTRIBUTORS.yaml - - FUNDERS.yaml + - GRANTS.yaml - ORGANISATIONS.yaml diff --git a/bin/gtn.rb b/bin/gtn.rb index 1de7f225e5e203..163650590ff914 100644 --- a/bin/gtn.rb +++ b/bin/gtn.rb @@ -2,7 +2,7 @@ CONTRIBUTORS = YAML.load_file('CONTRIBUTORS.yaml') ORGANISATIONS = YAML.load_file('ORGANISATIONS.yaml') -FUNDERS = YAML.load_file('FUNDERS.yaml') +GRANTS = YAML.load_file('GRANTS.yaml') def automagic_loading(f) # Remove our documentation @@ -18,7 +18,7 @@ def automagic_loading(f) repl = [] # If one of the elements in this array is CONTRIBUTORS, replace it with the same named variable repl << CONTRIBUTORS.keys if v.find { |x| x == 'CONTRIBUTORS' } - repl << FUNDERS.keys if v.find { |x| x == 'FUNDERS' } + repl << GRANTS.keys if v.find { |x| x == 'GRANTS' } repl << ORGANISATIONS.keys if v.find { |x| x == 'ORGANISATIONS' } v.replace repl.flatten if repl.length.positive? end diff --git a/bin/news.rb b/bin/news.rb index e8787c772108be..dfbdcc3a249dda 100755 --- a/bin/news.rb +++ b/bin/news.rb @@ -51,7 +51,7 @@ CONTRIBUTORS = safe_load_yaml('CONTRIBUTORS.yaml') ORGANISATIONS = safe_load_yaml('ORGANISATIONS.yaml') -FUNDERS = safe_load_yaml('FUNDERS.yaml') +GRANTS = safe_load_yaml('GRANTS.yaml') # new news # new slidevideos @@ -140,7 +140,7 @@ def isDraft(n) .split("\n").grep(/^\+[^ ]+:\s*$/).map { |x| x.strip[1..-2] }, organisations: `git diff --unified --ignore-all-space #{options[:previousCommit]} ORGANISATIONS.yaml` .split("\n").grep(/^\+[^ ]+:\s*$/).map { |x| x.strip[1..-2] }, - funders: `git diff --unified --ignore-all-space #{options[:previousCommit]} FUNDERS.yaml` + grants: `git diff --unified --ignore-all-space #{options[:previousCommit]} GRANTS.yaml` .split("\n").grep(/^\+[^ ]+:\s*$/).map { |x| x.strip[1..-2] }, } @@ -234,10 +234,10 @@ def build_news(data, filter: nil, updates: true, only_news: false) output += data[:organisations].map { |c| linkify("@#{c}", "hall-of-fame/#{c}") }.join("\n").gsub(/^/, '- ') end - if filter.nil? && data[:funders].length.positive? + if filter.nil? && data[:grants].length.positive? newsworthy = true - output += "\n\n## #{data[:funders].length} new funders!\n\n" - output += data[:funders].map { |c| linkify("@#{c}", "hall-of-fame/#{c}") }.join("\n").gsub(/^/, '- ') + output += "\n\n## #{data[:grants].length} new grants!\n\n" + output += data[:grants].map { |c| linkify("@#{c}", "hall-of-fame/#{c}") }.join("\n").gsub(/^/, '- ') end [output, newsworthy] diff --git a/bin/validate-contributors.rb b/bin/validate-contributors.rb index 17d8b0a89065e1..15f71c33746e4e 100755 --- a/bin/validate-contributors.rb +++ b/bin/validate-contributors.rb @@ -11,9 +11,9 @@ CONTRIBUTORS_SCHEMA = automagic_loading(CONTRIBUTORS_SCHEMA_UNSAFE) contribs_validator = Kwalify::Validator.new(CONTRIBUTORS_SCHEMA) -FUNDERS_SCHEMA_UNSAFE = YAML.load_file('bin/schema-funders.yaml') -FUNDERS_SCHEMA = automagic_loading(FUNDERS_SCHEMA_UNSAFE) -funders_validator = Kwalify::Validator.new(FUNDERS_SCHEMA) +GRANTS_SCHEMA_UNSAFE = YAML.load_file('bin/schema-grants.yaml') +GRANTS_SCHEMA = automagic_loading(GRANTS_SCHEMA_UNSAFE) +grants_validator = Kwalify::Validator.new(GRANTS_SCHEMA) ORGANISATIONS_SCHEMA_UNSAFE = YAML.load_file('bin/schema-organisations.yaml') ORGANISATIONS_SCHEMA = automagic_loading(ORGANISATIONS_SCHEMA_UNSAFE) @@ -43,8 +43,8 @@ def show_errors(file, errs) # This variable from bin/gtn.rb errs = validate_document(CONTRIBUTORS, contribs_validator) ec |= show_errors('CONTRIBUTORS.yaml', errs) -errs = validate_document(FUNDERS, funders_validator) -ec |= show_errors('FUNDERS.yaml', errs) +errs = validate_document(GRANTS, grants_validator) +ec |= show_errors('GRANTS.yaml', errs) errs = validate_document(ORGANISATIONS, organisations_validator) ec | show_errors('ORGANISATIONS.yaml', errs) From 8d68064e87f9049941c8a75cf028a06d7c679229 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 12:40:03 +0200 Subject: [PATCH 05/41] rename schema file --- bin/{schema-funders.yaml => schema-grants.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename bin/{schema-funders.yaml => schema-grants.yaml} (100%) diff --git a/bin/schema-funders.yaml b/bin/schema-grants.yaml similarity index 100% rename from bin/schema-funders.yaml rename to bin/schema-grants.yaml From 69eb536d69af6e1c78e1210ab623527b2ca4aead Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 13:10:22 +0200 Subject: [PATCH 06/41] rename funders to grants --- _includes/funding-statement.md | 4 ++-- _layouts/news.html | 2 +- bin/schema-contributors.yaml | 4 ++-- bin/schema-event-external.yaml | 4 ++-- bin/schema-event.yaml | 4 ++-- bin/schema-learning-pathway.yaml | 2 +- bin/schema-news.yaml | 8 ++++---- bin/schema-slides.yaml | 8 ++++---- bin/schema-tutorial.yaml | 8 ++++---- metadata/funders.yaml | 1 - metadata/grants.yaml | 1 + metadata/schema-funders.yaml | 1 - metadata/schema-grants.yaml | 1 + topics/contributing/tutorials/schemas/tutorial.md | 4 ++-- 14 files changed, 26 insertions(+), 26 deletions(-) delete mode 120000 metadata/funders.yaml create mode 120000 metadata/grants.yaml delete mode 120000 metadata/schema-funders.yaml create mode 120000 metadata/schema-grants.yaml diff --git a/_includes/funding-statement.md b/_includes/funding-statement.md index d2f72e4526ebce..47bef4df50ea78 100644 --- a/_includes/funding-statement.md +++ b/_includes/funding-statement.md @@ -3,7 +3,7 @@ {% for id in include.funders %} {% assign name = site.data.contributors[id].name | default: id -%} - {% assign pfo = site.data.funders[id] | default: site.data.organisations[id] | default: site.data.contributors[id] | default: nil %} + {% assign pfo = site.data.grants[id] | default: site.data.organisations[id] | default: site.data.contributors[id] | default: nil %}
{% if pfo.avatar %} Logo @@ -17,7 +17,7 @@
{{ pfo.short_name | default: pfo.name | default: id }}
- {{ site.data.funders[id].funding_statement | markdownify | strip_html }} + {{ site.data.grants[id].funding_statement | markdownify | strip_html }} {{ pfo.description }}
diff --git a/_layouts/news.html b/_layouts/news.html index 7008c0d1f06381..8b6e4b1cb4ef29 100644 --- a/_layouts/news.html +++ b/_layouts/news.html @@ -59,7 +59,7 @@

{{locale['references']| default: "References" }}

{% if page.contributions %} {% if page.contributions.funding %}

{{locale['references']| default: "Funding" }}

-

These individuals or organisations provided funding support for the development of this resource

+

These organisations or grants provided funding support for the development of this resource

{% include _includes/funding-statement.md funders=page.contributions.funding %} {% endif %} {% endif %} diff --git a/bin/schema-contributors.yaml b/bin/schema-contributors.yaml index 0ecf767339915e..9ea70e54cb1135 100644 --- a/bin/schema-contributors.yaml +++ b/bin/schema-contributors.yaml @@ -101,7 +101,7 @@ mapping: required: true enum: - ORGANISATIONS - - FUNDERS + - GRANTS former_affiliations: type: seq description: "A set of organisations you were previously affiliated with" @@ -110,7 +110,7 @@ mapping: required: true enum: - ORGANISATIONS - - FUNDERS + - GRANTS elixir_node: type: str enum: diff --git a/bin/schema-event-external.yaml b/bin/schema-event-external.yaml index 6320791ef1d464..973c5b02d6e0a0 100644 --- a/bin/schema-event-external.yaml +++ b/bin/schema-event-external.yaml @@ -58,7 +58,7 @@ mapping: enum: - CONTRIBUTORS - ORGANISATIONS - - FUNDERS + - GRANTS funding: type: seq description: These entities provided funding support for the development of this resource @@ -66,7 +66,7 @@ mapping: - type: str enum: - ORGANISATIONS - - FUNDERS + - GRANTS translation: type: seq description: These entities did translation and localisation work on this resource diff --git a/bin/schema-event.yaml b/bin/schema-event.yaml index 0e792fbd26a11c..98d3437a5e2076 100644 --- a/bin/schema-event.yaml +++ b/bin/schema-event.yaml @@ -53,7 +53,7 @@ mapping: enum: - CONTRIBUTORS - ORGANISATIONS - - FUNDERS + - GRANTS funding: type: seq description: These entities provided funding support for the development of this resource @@ -61,7 +61,7 @@ mapping: - type: str enum: - ORGANISATIONS - - FUNDERS + - GRANTS translation: type: seq description: These entities did translation and localisation work on this resource diff --git a/bin/schema-learning-pathway.yaml b/bin/schema-learning-pathway.yaml index 7852cf67dbd301..018cbdd2596ad1 100644 --- a/bin/schema-learning-pathway.yaml +++ b/bin/schema-learning-pathway.yaml @@ -54,7 +54,7 @@ mapping: - type: str enum: - ORGANISATIONS - - FUNDERS + - GRANTS tags: type: seq description: Any relevant tags that would help a user discover this LP diff --git a/bin/schema-news.yaml b/bin/schema-news.yaml index d11e68de938776..e7ed0c10945eb2 100644 --- a/bin/schema-news.yaml +++ b/bin/schema-news.yaml @@ -35,7 +35,7 @@ mapping: enum: - CONTRIBUTORS - ORGANISATIONS - - FUNDERS + - GRANTS description: | List of tutorial contributors. Please use `contributions` instead as it provides more detailed accounting of tutorial history. _examples: @@ -104,7 +104,7 @@ mapping: enum: - CONTRIBUTORS - ORGANISATIONS - - FUNDERS + - GRANTS funding: type: seq description: These entities provided funding support for the development of this resource @@ -112,7 +112,7 @@ mapping: - type: str enum: - ORGANISATIONS - - FUNDERS + - GRANTS translation: type: seq description: These entities did translation and localisation work on this resource @@ -129,7 +129,7 @@ mapping: enum: - CONTRIBUTORS - ORGANISATIONS - - FUNDERS + - GRANTS cover: type: str description: | diff --git a/bin/schema-slides.yaml b/bin/schema-slides.yaml index dcde51fb887e9e..13249ef2bbac0e 100644 --- a/bin/schema-slides.yaml +++ b/bin/schema-slides.yaml @@ -83,7 +83,7 @@ mapping: required: true enum: - CONTRIBUTORS - - FUNDERS + - GRANTS - ORGANISATIONS description: | List of tutorial contributors. Please use `contributions` instead as it provides more detailed accounting of tutorial history. @@ -153,7 +153,7 @@ mapping: enum: - CONTRIBUTORS - ORGANISATIONS - - FUNDERS + - GRANTS funding: type: seq description: These entities provided funding support for the development of this resource @@ -161,7 +161,7 @@ mapping: - type: str enum: - ORGANISATIONS - - FUNDERS + - GRANTS translation: type: seq description: These entities did translation and localisation work on this resource @@ -178,7 +178,7 @@ mapping: enum: - CONTRIBUTORS - ORGANISATIONS - - FUNDERS + - GRANTS hands_on: type: str enum: diff --git a/bin/schema-tutorial.yaml b/bin/schema-tutorial.yaml index a0340c245f87a5..2c5d8547b85e64 100644 --- a/bin/schema-tutorial.yaml +++ b/bin/schema-tutorial.yaml @@ -83,7 +83,7 @@ mapping: required: true enum: - CONTRIBUTORS - - FUNDERS + - GRANTS - ORGANISATIONS description: | List of tutorial contributors. Please use `contributions` instead as it provides more detailed accounting of tutorial history. @@ -153,7 +153,7 @@ mapping: enum: - CONTRIBUTORS - ORGANISATIONS - - FUNDERS + - GRANTS funding: type: seq description: These entities provided funding support for the development of this resource @@ -161,7 +161,7 @@ mapping: - type: str enum: - ORGANISATIONS - - FUNDERS + - GRANTS translation: type: seq description: These entities did translation and localisation work on this resource @@ -178,7 +178,7 @@ mapping: enum: - CONTRIBUTORS - ORGANISATIONS - - FUNDERS + - GRANTS subtopic: type: str description: | diff --git a/metadata/funders.yaml b/metadata/funders.yaml deleted file mode 120000 index d0859efd5ab683..00000000000000 --- a/metadata/funders.yaml +++ /dev/null @@ -1 +0,0 @@ -../FUNDERS.yaml \ No newline at end of file diff --git a/metadata/grants.yaml b/metadata/grants.yaml new file mode 120000 index 00000000000000..3b511b7b8dfd63 --- /dev/null +++ b/metadata/grants.yaml @@ -0,0 +1 @@ +../GRANTS.yaml \ No newline at end of file diff --git a/metadata/schema-funders.yaml b/metadata/schema-funders.yaml deleted file mode 120000 index 31f74974b577e9..00000000000000 --- a/metadata/schema-funders.yaml +++ /dev/null @@ -1 +0,0 @@ -../bin/schema-funders.yaml \ No newline at end of file diff --git a/metadata/schema-grants.yaml b/metadata/schema-grants.yaml new file mode 120000 index 00000000000000..56c55875577c75 --- /dev/null +++ b/metadata/schema-grants.yaml @@ -0,0 +1 @@ +../bin/schema-grants.yaml \ No newline at end of file diff --git a/topics/contributing/tutorials/schemas/tutorial.md b/topics/contributing/tutorials/schemas/tutorial.md index 2990641537e206..0f365685665f17 100644 --- a/topics/contributing/tutorials/schemas/tutorial.md +++ b/topics/contributing/tutorials/schemas/tutorial.md @@ -60,6 +60,6 @@ contributors: {% assign kid_val = site.data['schema-organisations'] %} {% include _includes/schema-render.html key=kid_key value=kid_val %} -{% assign kid_key = "Funder Schema" %} -{% assign kid_val = site.data['schema-funders'] %} +{% assign kid_key = "Grant Schema" %} +{% assign kid_val = site.data['schema-grants'] %} {% include _includes/schema-render.html key=kid_key value=kid_val %} From 4396760858639c923a7150f160d4a214dc260b70 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 14:47:57 +0200 Subject: [PATCH 07/41] reword explanation --- ORGANISATIONS.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ORGANISATIONS.yaml b/ORGANISATIONS.yaml index 102d6b324eb7e7..8e5a74bdcc2f19 100644 --- a/ORGANISATIONS.yaml +++ b/ORGANISATIONS.yaml @@ -1,13 +1,14 @@ --- -# List of contributing organisations +# List of contributing organisations and projects # -# reference maintainers/contributors by their id in tutorial metadata files +# Organisations can be listed as affiliations for contributors +# Organisations can be listed as funding contributors on training materials # # valid tags: name, email, linkedin, twitter, gitter, orcid, bio, joined # -# collection names should be equal to github username, if not, add github: false tag - -# our real contributing organisations <3 (please add them in alphabetical order) +# ID should be equal to github username if one exists, otherwise add 'github: false' +# +# Please add organisations in alphabetical order AustralianBioCommons: name: Australian BioCommons From dca55482c045afe9e57e7dcbd13b7217cced3498 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 14:48:24 +0200 Subject: [PATCH 08/41] more renaming of funders to grants --- _plugins/gtn.rb | 18 +++++++++--------- _plugins/gtn/contributors.rb | 12 ++++++------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/_plugins/gtn.rb b/_plugins/gtn.rb index 826c02e6cf58ec..945a42d34b3691 100644 --- a/_plugins/gtn.rb +++ b/_plugins/gtn.rb @@ -847,7 +847,7 @@ def find_learningpaths_including_topic(site, topic_id) ## # We're going to do some find and replace, to replace `@gtn:contributorName` with a link to their profile. Jekyll::Hooks.register :site, :pre_render do |site| - pfo_keys = site.data['contributors'].keys + site.data['funders'].keys + site.data['organisations'].keys + pfo_keys = site.data['contributors'].keys + site.data['grants'].keys + site.data['organisations'].keys site.posts.docs.each do |post| if post.content post.content = post.content.gsub(/@gtn:([a-zA-Z0-9_-]+)/) do |match| @@ -899,10 +899,10 @@ def find_learningpaths_including_topic(site, topic_id) end site.data['organisations'][affiliation]['members'] << name - elsif site.data['funders'].key?(affiliation) - site.data['funders'][affiliation]['members'] = [] if !site.data['funders'][affiliation].key?('members') + elsif site.data['grants'].key?(affiliation) + site.data['grants'][affiliation]['members'] = [] if !site.data['grants'][affiliation].key?('members') - site.data['funders'][affiliation]['members'] << name + site.data['grants'][affiliation]['members'] << name end end end @@ -915,12 +915,12 @@ def find_learningpaths_including_topic(site, topic_id) end site.data['organisations'][affiliation]['former_members'] << name - elsif site.data['funders'].key?(affiliation) - if !site.data['funders'][affiliation].key?('former_members') - site.data['funders'][affiliation]['former_members'] = [] + elsif site.data['grants'].key?(affiliation) + if !site.data['grants'][affiliation].key?('former_members') + site.data['grants'][affiliation]['former_members'] = [] end - site.data['funders'][affiliation]['former_members'] << name + site.data['grants'][affiliation]['former_members'] << name end end end @@ -947,7 +947,7 @@ def find_learningpaths_including_topic(site, topic_id) # Annotate symlinks site.pages.each do |page| - page.data['symlink'] = File.symlink?(page.path) + page.data['symlink'] = File.symlink?(page.path) # Elsewhere we checked more levels deep, maybe enable if needed. # || File.symlink?(File.dirname(page.path)) || File.symlink?(File.dirname(File.dirname(page.path))) end diff --git a/_plugins/gtn/contributors.rb b/_plugins/gtn/contributors.rb index acc232ae5fd5bb..5d718fdf2d907b 100644 --- a/_plugins/gtn/contributors.rb +++ b/_plugins/gtn/contributors.rb @@ -94,7 +94,7 @@ def self.get_non_authors(material) def self.get_funders(site, data) if data.key?('contributions') && data['contributions'].key?('funding') # The ones specifically in the Grants table - data['contributions']['funding'].reject{ |f| site.data['funders'].key?(f) } + data['contributions']['funding'].reject{ |f| site.data['grants'].key?(f) } else [] end @@ -110,7 +110,7 @@ def self.get_funders(site, data) def self.get_grants(site, data) if data.key?('contributions') && data['contributions'].key?('funding') # The ones specifically in the Grants table - data['contributions']['funding'].select{ |f| site.data['funders'].key?(f) } + data['contributions']['funding'].select{ |f| site.data['grants'].key?(f) } else [] end @@ -140,8 +140,8 @@ def self.fetch(site, c, warn: false) return ['contributor', site.data['contributors'][c]] elsif _load_file(site, 'organisations').key?(c) return ['organisation', site.data['organisations'][c]] - elsif _load_file(site, 'funders').key?(c) - return ['funder', site.data['funders'][c]] + elsif _load_file(site, 'grants').key?(c) + return ['funder', site.data['grants'][c]] else if ! warn Jekyll.logger.warn "Contributor #{c} not found" @@ -181,7 +181,7 @@ def self.fetch_name(site, c, warn: false) # +Hash+ of contributors, funders, organisations merged together def self.list(site) site.data['contributors'] - .merge(site.data['funders']) + .merge(site.data['grants']) .merge(site.data['organisations']) .reject { |c| c['halloffame'] == 'no' } end @@ -203,7 +203,7 @@ def self.person?(site, c) # Returns: # +Boolean+ of whether the contributor is a funder or not def self.funder?(site, c) - site.data['funders'].key?(c) + site.data['grants'].key?(c) end ## From 0fe6af8cea2601f02e806974d50700ffdf9a01de Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 15:12:34 +0200 Subject: [PATCH 09/41] move anything without funding_id to organisations --- GRANTS.yaml | 57 +++++------------------------------------ ORGANISATIONS.yaml | 64 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 50 deletions(-) diff --git a/GRANTS.yaml b/GRANTS.yaml index 697ebcb1a21426..bedd698f865d75 100644 --- a/GRANTS.yaml +++ b/GRANTS.yaml @@ -12,18 +12,13 @@ # joined: 2020-06 # url: "https://example.com" # homepage of the project # funding_id: 2020-1-NL01-KA203-064717 -# funding_system: erasmusplus # database/page where your +# funding_system: cordis|ukri|erasmusplus # database/page where your project/grant is listed, if your project is not present in one of these portals, please ping us # funding_statement: "This project is funded with the support of .." # github: false # add this if the id does not map to a GitHub account, # please add in alphabetical order of the key (id) -abromics: - name: ABRomics - github: false - joined: 2024-01 - avatar: "/training-material/assets/images/abromics.png" - url: https://www.abromics.fr/ + ai4life: name: AI4Life @@ -68,15 +63,6 @@ CINECA-Project: funder_name: Horizon 2020 url: https://www.cineca-project.eu -nfdi4plants: - name: DataPLANT - joined: 2024-07 - url: https://www.nfdi4plants.de://www.nfdi4plants.de/ - avatar: "/training-material/shared/images/logo-dataplant.svg" - -deNBI: - name: de.NBI - url: https://www.denbi.de/ elixir-converge: name: ELIXIR-CONVERGE @@ -96,14 +82,11 @@ edctp-eu: avatar: "/training-material/shared/images/edctp-eu.png" github: false url: https://www.edctp.org/ + funding_id: "101103640" + funding_system: cordis funding_statement: | TB-CAPT, PanACEA and PANGenS are part of the EDCTP2 and EDCTP3 programs supported by the European Union. -elixir-excelerate: - name: Elixir Excelerate - url: https://www.elixir-europe.org/excelerate/ - avatar: "/training-material/shared/images/Excelerate_whitebackground.png" - github: false elixir-fair-data: name: "ELIXIR-UK: FAIR Data Stewardship training" @@ -144,9 +127,12 @@ epsrc-training-grant: short_name: "EPSRC/OU" github: false joined: 2022-09 + funding_id: "EP/T518165/1" + funding_system: ukri funding_statement: The research internship was supported through EPSRC Training Grant DTP 2020-2021 Open University url: "https://www.open.ac.uk/" + eurosciencegateway: name: EuroScienceGateway github: false @@ -215,21 +201,6 @@ h2020-defend: This work has received funding from the DEFEND project (www.defend2020.eu) with funding from the European Union's Horizon 2020 research and innovation programme under grant agreement No 773701. url: https://www.defend2020.eu -madland: - name: MAdLand - joined: 2024-07 - url: https://madland.science - avatar: "/training-material/shared/images/logo-madland.jpg" - github: false - -mwk: - name: Ministerium für Wissenschaft, Forschung und Kunst Baden-Württemberg - short_name: MWK - joined: 2024-08 - url: https://mwk.baden-wuerttemberg.de/de/startseite - avatar: "/training-material/assets/images/MWK.png" - github: false - nhgri-anvil: name: National Human Genome Research Institute Genomic Data Science Analysis, Visualization, and Informatics Lab-Space short_name: "NHGRI ANVIL" @@ -248,20 +219,6 @@ nhgri-gdscn: url: https://www.genome.gov/ funding_id: 75N92022P00232 -NIH: - name: National Institutes of Health - short_name: "NIH" - avatar: /training-material/shared/images/nih.png - url: https://nih.gov/ - github: false - - -nsf: - name: National Science Foundation - url: https://www.nsf.gov - avatar: "/training-material/shared/images/nsf.gif" - github: false - sfb992: name: Collaborative Research Centre 992 diff --git a/ORGANISATIONS.yaml b/ORGANISATIONS.yaml index 8e5a74bdcc2f19..ee735e2f9f637e 100644 --- a/ORGANISATIONS.yaml +++ b/ORGANISATIONS.yaml @@ -10,6 +10,13 @@ # # Please add organisations in alphabetical order +abromics: + name: ABRomics + github: false + joined: 2024-01 + avatar: "/training-material/assets/images/abromics.png" + url: https://www.abromics.fr/ + AustralianBioCommons: name: Australian BioCommons url: https://www.biocommons.org.au/ @@ -27,6 +34,10 @@ carpentries: joined: 2021-09 ror: "0356fgm10" +deNBI: + name: de.NBI + url: https://www.denbi.de/ + earlham: name: Earlham Institute joined: 2017-09 @@ -49,6 +60,13 @@ egi: github: false ror: "052jj4m32" + +elixir-excelerate: + name: Elixir Excelerate + url: https://www.elixir-europe.org/excelerate/ + avatar: "/training-material/shared/images/Excelerate_whitebackground.png" + github: false + elixir-europe: name: ELIXIR Europe joined: 2017-09 @@ -133,6 +151,13 @@ linq: avatar: "/training-material/shared/images/linq.jpg" github: false +madland: + name: MAdLand + joined: 2024-07 + url: https://madland.science + avatar: "/training-material/shared/images/logo-madland.jpg" + github: false + minnesotauni: name: University of Minnesota url: "https://twin-cities.umn.edu/" @@ -147,12 +172,39 @@ MPIIE: github: false ror: "058xzat49" +mwk: + name: Ministerium für Wissenschaft, Forschung und Kunst Baden-Württemberg + short_name: MWK + joined: 2024-08 + url: https://mwk.baden-wuerttemberg.de/de/startseite + avatar: "/training-material/assets/images/MWK.png" + github: false + ncbi: name: National Center for Biotechnology Information url: "https://www.ncbi.nlm.nih.gov/" avatar: "/training-material/shared/images/ncbi.png" ror: "02meqm098" +nfdi4plants: + name: DataPLANT + joined: 2024-07 + url: https://www.nfdi4plants.de://www.nfdi4plants.de/ + avatar: "/training-material/shared/images/logo-dataplant.svg" + +NIH: + name: National Institutes of Health + short_name: "NIH" + avatar: /training-material/shared/images/nih.png + url: https://nih.gov/ + github: false + +nsf: + name: National Science Foundation + url: https://www.nsf.gov + avatar: "/training-material/shared/images/nsf.gif" + github: false + panacea: name: PanACEA description: Pan-African Consortium for the Evaluation of Antituberculosis Antibiotics @@ -214,6 +266,12 @@ sciensano: github: false ror: "04ejags36" +sfb992: + name: Collaborative Research Centre 992 + url: https://www.sfb992.uni-freiburg.de/ + avatar: https://raw.githubusercontent.com/bgruening/presentations/bce348bb606c312d531c479e63a66efc2bc38d44/shared/resources/img/MEDEP.jpg + github: false + societyprotectionundergroundnetworks: name: Society for the Protection of Underground Networks description: "SPUN is a scientific research organization founded to map mycorrhizal fungal communities and advocate for their protection." @@ -240,6 +298,12 @@ uga: github: false ror: "02rx3b187" +ukmrc: + name: UK Medical Research Council + url: https://mrc.ukri.org + avatar: https://raw.githubusercontent.com/nomadscientist/scrnaseq_training/master/logo.png + github: false + uni-freiburg: name: University of Freiburg url: https://www.uni-freiburg.de/ From 1cc1abb2e856b6560e07b204a09a3c0b36ef0ebd Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 15:15:50 +0200 Subject: [PATCH 10/41] require funding_id for grants --- bin/schema-grants.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/schema-grants.yaml b/bin/schema-grants.yaml index 3f01d1ca52142d..2be9d7a46f5f85 100644 --- a/bin/schema-grants.yaml +++ b/bin/schema-grants.yaml @@ -101,6 +101,7 @@ mapping: - true funding_id: type: str + required: true description: The short identifier for your grant. _examples: - 2020-1-NL01-KA203-064717 From 82b4cb0223edaf5225b26ad6337b40e0b6c54016 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 15:16:51 +0200 Subject: [PATCH 11/41] support highergov for grant database --- _plugins/gtn/contributors.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/_plugins/gtn/contributors.rb b/_plugins/gtn/contributors.rb index 5d718fdf2d907b..87991075236886 100644 --- a/_plugins/gtn/contributors.rb +++ b/_plugins/gtn/contributors.rb @@ -222,6 +222,8 @@ def self.fetch_funding_url(contributor) "https://erasmus-plus.ec.europa.eu/projects/search/details/#{contributor['funding_id']}" when 'ukri' "https://gtr.ukri.org/projects?ref=#{contributor['funding_id']}" + when 'highergov' + "https://www.highergov.com/contract/#{contributor['funding_id']}/" else Jekyll.logger.error "Unknown funding system #{contributor['funding_system']}" 'ERROR' From a930dfbdbd3b6d540e07c798082bcd15fc594bf3 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 15:20:35 +0200 Subject: [PATCH 12/41] rename funding_system to funding_database --- GRANTS.yaml | 43 +++++++++++++-------------------- _layouts/contributor_index.html | 2 +- _plugins/gtn/contributors.rb | 6 ++--- bin/schema-grants.yaml | 3 ++- 4 files changed, 23 insertions(+), 31 deletions(-) diff --git a/GRANTS.yaml b/GRANTS.yaml index bedd698f865d75..7aa3b60046bee0 100644 --- a/GRANTS.yaml +++ b/GRANTS.yaml @@ -3,7 +3,7 @@ # Can be added to contributions->funding metadata key of materials # # valid tags: -# name, joined, url, github, funding_id, funding_system, funding_statement +# name, joined, url, github, funding_id, funding_database, funding_statement # --- @@ -12,7 +12,7 @@ # joined: 2020-06 # url: "https://example.com" # homepage of the project # funding_id: 2020-1-NL01-KA203-064717 -# funding_system: cordis|ukri|erasmusplus # database/page where your project/grant is listed, if your project is not present in one of these portals, please ping us +# funding_database: cordis|highergov|ukri|erasmusplus # database where your project/grant is listed, if your project is not present in one of these portals, please ping us. EU grants are often listed in cordis, US grants in highergov # funding_statement: "This project is funded with the support of .." # github: false # add this if the id does not map to a GitHub account, @@ -27,7 +27,7 @@ ai4life: avatar: "/training-material/topics/ai4life/images/AI4Life-logo_giraffe-solid.png" url: https://ai4life.eurobioimaging.eu/ funding_id: "101057970" - funding_system: cordis + funding_database: cordis funding_statement: | AI4Life has received funding from the European Union’s Horizon Europe research and innovation programme under grant agreement number 101057970. @@ -38,7 +38,7 @@ biont: avatar: "/training-material/assets/images/BioNT_Logo.png" url: https://biont-training.eu/ funding_id: "101100604" - funding_system: cordis + funding_database: cordis funding_statement: | Co-funded by the European Union @@ -46,7 +46,7 @@ by-covid: name: BeYond-COVID joined: 2023-04 funding_id: "101046203" - funding_system: cordis + funding_database: cordis funding_statement: | BY-COVID is an EC funded project that tackles the data challenges that can hinder effective pandemic response. @@ -59,7 +59,7 @@ CINECA-Project: short_name: "CINECA" joined: 2023-03 funding_id: "825775" - funding_system: cordis + funding_database: cordis funder_name: Horizon 2020 url: https://www.cineca-project.eu @@ -71,7 +71,7 @@ elixir-converge: avatar: "https://elixir-europe.org/sites/default/files/styles/right-medium/public/images/converge_logo.png" url: https://elixir-europe.org/about-us/how-funded/eu-projects/converge funding_id: "871075" - funding_system: cordis + funding_database: cordis funding_statement: | ELIXIR CONVERGE is connecting and align ELIXIR Nodes to deliver sustainable FAIR life-science data management services. This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement № 871075 @@ -83,7 +83,7 @@ edctp-eu: github: false url: https://www.edctp.org/ funding_id: "101103640" - funding_system: cordis + funding_database: cordis funding_statement: | TB-CAPT, PanACEA and PANGenS are part of the EDCTP2 and EDCTP3 programs supported by the European Union. @@ -94,7 +94,7 @@ elixir-fair-data: github: false joined: 2023-06 funding_id: MR/V038966/1 - funding_system: ukri + funding_database: ukri funding_statement: | This work has been funded by the ELIXIR-UK FAIR Data Stewardship training UKRI award (MR/V038966/1) @@ -105,7 +105,7 @@ elixir-uk-dash: github: false avatar: "/training-material/shared/images/dash.png" url: https://elixiruknode.org/projects/elixir-uk-dash/ - funding_system: ukri + funding_database: ukri funding_id: MR/V038966/1 funding_statement: | This Fellowship was funded through the ELIXIR-UK DaSH project as part of the UKRI Innovation Scholars: Data Science Training in Health and Bioscience call (DaSH). (MR/V038966/1). The project aims to embed Research Data Management (RDM) know-how into UK universities and institutes by producing and delivering training in FAIR data stewardship using ELIXIR-UK knowledge and resources. @@ -116,7 +116,7 @@ eosc-life: joined: 2023-04 avatar: "https://www.eosc-life.eu/wp-content/themes/eosc-life-v2/assets/images/eosclogo.png" funding_id: "824087" - funding_system: cordis + funding_database: cordis funding_statement: | EOSC-Life has received funding from the European Union’s Horizon 2020 programme under grant agreement number 824087 url: https://www.eosc-life.eu @@ -128,7 +128,7 @@ epsrc-training-grant: github: false joined: 2022-09 funding_id: "EP/T518165/1" - funding_system: ukri + funding_database: ukri funding_statement: The research internship was supported through EPSRC Training Grant DTP 2020-2021 Open University url: "https://www.open.ac.uk/" @@ -140,7 +140,7 @@ eurosciencegateway: avatar: "/training-material/assets/images/eurosciencegateway.png" url: https://galaxyproject.org/projects/esg/ funding_id: "101057388" - funding_system: cordis + funding_database: cordis funding_statement: | EuroScienceGateway was funded by the European Union programme Horizon Europe (HORIZON-INFRA-2021-EOSC-01-04) under grant agreement number 101057388 and by UK Research and Innovation (UKRI) under the UK government’s Horizon Europe funding guarantee grant number 10038963. @@ -152,7 +152,7 @@ fairease: avatar: "/training-material/assets/images/fair_ease_colour.png" url: https://fairease.eu/ funding_id: "101058785" - funding_system: cordis + funding_database: cordis funding_statement: | FAIR-EASE is a RIA project funded under HORIZON-INFRA-2021-EOSC-01-04, and it involves a consortium of 25 partners from all over Europe. @@ -181,7 +181,7 @@ gallantries: url: "https://www.erasmusplus.nl" funder_name: Erasmus+ Programme funding_id: 2020-1-NL01-KA203-064717 - funding_system: erasmusplus + funding_database: erasmusplus funding_statement: | This project ([`2020-1-NL01-KA203-064717`](https://erasmus-plus.ec.europa.eu/projects/search/details/2020-1-NL01-KA203-064717)) is funded with the support of the Erasmus+ programme of the European Union. Their funding has supported a large number of tutorials within the GTN across a wide array of topics. ![eu flag with the text: with the support of the erasmus programme of the european union](https://gallantries.github.io/assets/images/logosbeneficaireserasmusright_en.jpg) @@ -194,7 +194,7 @@ h2020-defend: github: false joined: 2023-05 funding_id: "773701" - funding_system: cordis + funding_database: cordis funding_statement: | DEFEND is Addressing the dual emerging threats of African Swine Fever and Lumpy Skin Disease in Europe. @@ -218,19 +218,10 @@ nhgri-gdscn: avatar: https://www.ashg.org/wp-content/uploads/2021/07/nhgri-logo-800x167-1.png url: https://www.genome.gov/ funding_id: 75N92022P00232 + funding_database: highergov -sfb992: - name: Collaborative Research Centre 992 - url: https://www.sfb992.uni-freiburg.de/ - avatar: https://raw.githubusercontent.com/bgruening/presentations/bce348bb606c312d531c479e63a66efc2bc38d44/shared/resources/img/MEDEP.jpg - github: false -ukmrc: - name: UK Medical Research Council - url: https://mrc.ukri.org - avatar: https://raw.githubusercontent.com/nomadscientist/scrnaseq_training/master/logo.png - github: false diff --git a/_layouts/contributor_index.html b/_layouts/contributor_index.html index 38a24a80a4542b..14dba6728c5f7d 100644 --- a/_layouts/contributor_index.html +++ b/_layouts/contributor_index.html @@ -273,7 +273,7 @@

External Links

{% if entity.funding_id %} Grant ID: - {% if entity.funding_system %} + {% if entity.funding_database %} {% assign url = entity | fetch_funding_url %}
{{ entity.funding_id }} diff --git a/_plugins/gtn/contributors.rb b/_plugins/gtn/contributors.rb index 87991075236886..4449fc2c5911d6 100644 --- a/_plugins/gtn/contributors.rb +++ b/_plugins/gtn/contributors.rb @@ -213,9 +213,9 @@ def self.funder?(site, c) # Returns: # +Boolean+ of whether the contributor is a funder or not def self.fetch_funding_url(contributor) - return contributor['funding_id'] if !contributor.key?('funding_system') + return contributor['funding_id'] if !contributor.key?('funding_database') - case contributor['funding_system'] + case contributor['funding_database'] when 'cordis' "https://cordis.europa.eu/project/id/#{contributor['funding_id']}" when 'erasmusplus' @@ -225,7 +225,7 @@ def self.fetch_funding_url(contributor) when 'highergov' "https://www.highergov.com/contract/#{contributor['funding_id']}/" else - Jekyll.logger.error "Unknown funding system #{contributor['funding_system']}" + Jekyll.logger.error "Unknown funding system #{contributor['funding_database']}" 'ERROR' end end diff --git a/bin/schema-grants.yaml b/bin/schema-grants.yaml index 2be9d7a46f5f85..3d5a2a37e928f5 100644 --- a/bin/schema-grants.yaml +++ b/bin/schema-grants.yaml @@ -110,13 +110,14 @@ mapping: description: associated webpage (NOTE, funders only!) _examples: - "https://elixir-europe.org" - funding_system: + funding_database: type: str description: Automatically link to the grant's information in the appropriate funding system site. enum: - cordis - erasmusplus - ukri + - highergov funder_name: type: str description: A name for the agency providing the funding. From b595d77de0aa6b5ec131ee1edac4d763704bd0ae Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 15:29:08 +0200 Subject: [PATCH 13/41] update description --- GRANTS.yaml | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/GRANTS.yaml b/GRANTS.yaml index 7aa3b60046bee0..4d1d3eb67a5ec4 100644 --- a/GRANTS.yaml +++ b/GRANTS.yaml @@ -1,23 +1,26 @@ -# List of grants funding GTN activity +--- +# List of grants funding GTN contributions # -# Can be added to contributions->funding metadata key of materials +# Grants can be listed as affiliations for contributors +# Grants can be listed as funding contributors on training materials # # valid tags: # name, joined, url, github, funding_id, funding_database, funding_statement # ---- - +# EXAMPLE +# # Grant1: # should be equal to GitHub username if one exists for the grant/project # name: Grant/Project name +# github: false # add this if the id does not map to a GitHub account # joined: 2020-06 -# url: "https://example.com" # homepage of the project -# funding_id: 2020-1-NL01-KA203-064717 -# funding_database: cordis|highergov|ukri|erasmusplus # database where your project/grant is listed, if your project is not present in one of these portals, please ping us. EU grants are often listed in cordis, US grants in highergov -# funding_statement: "This project is funded with the support of .." -# github: false # add this if the id does not map to a GitHub account, - -# please add in alphabetical order of the key (id) - +# url: "https://example.com" # homepage of the project +# funding_statement: "This project is funded with the support of .." # optional, if you want a specific bit of text to appear +# funding_id: 2020-1-NL01-KA203-064717 # id of your grant, will be used in combination with funding_database to link to offical grant page +# funding_database: cordis|highergov|ukri|erasmusplus # Database where your project/grant is listed, +# # EU grants are often listed in cordis, US grants in highergov, UK grants in ukri +# # If your project is not present in one of these databases, please ping us. +# +# Please add in alphabetical order of the key (id) ai4life: From 7a1d243e1a2d9fa7717e9b880f7b81a49ac87e8d Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 16:04:43 +0200 Subject: [PATCH 14/41] add a bunch of short names --- ORGANISATIONS.yaml | 65 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/ORGANISATIONS.yaml b/ORGANISATIONS.yaml index ee735e2f9f637e..a177399511dfde 100644 --- a/ORGANISATIONS.yaml +++ b/ORGANISATIONS.yaml @@ -23,6 +23,7 @@ AustralianBioCommons: avatar: "/training-material/shared/images/Australian-Biocommons-Favicon-RGB.png" avans-atgm: + short_name: Avans name: Avans Hogeschool joined: 2020-11 url: https://avans.nl @@ -46,8 +47,8 @@ earlham: ror: "018cxtf62" edctp: - name: EDCTP - description: The European & Developing Countries Clinical Trials Partnership + short_name: EDCTP + name: The European & Developing Countries Clinical Trials Partnership url: https://www.edctp.org/ avatar: "/training-material/shared/images/edctp.jpg" github: false @@ -62,7 +63,7 @@ egi: elixir-excelerate: - name: Elixir Excelerate + name: ELIXIR Excelerate url: https://www.elixir-europe.org/excelerate/ avatar: "/training-material/shared/images/Excelerate_whitebackground.png" github: false @@ -81,22 +82,23 @@ elixir-goblet-ttt: avatar: "/training-material/shared/images/elixir.png" embl-ebi: - name: EMBL-EBI - description: European Bioinformatics Institute + short_name: EMBL-EBI + name: European Bioinformatics Institute url: https://www.ebi.ac.uk avatar: https://raw.githubusercontent.com/nomadscientist/scrnaseq_training/d0fccaa9dc8dc8615eb7146d6c5e96bd36f11f3a/EMBL_EBI_Logo_black.svg github: false ror: "02catss52" erasmusmc: - name: ErasmusMC - description: Erasmus Medical Center + short_name: ErasmusMC + name: Erasmus Medical Center url: https://www.erasmusmc.nl avatar: "/training-material/shared/images/logo-erasmusmc.png" github: false ror: "018906e22" eu: + short_name: EU name: The European Union url: https://european-union.europa.eu avatar: "/training-material/shared/images/EU-logo.jpg" @@ -104,14 +106,15 @@ eu: ror: "019w4f821" find: - name: FIND - description: The Foundation for Innovative New Diagnostics + short_name: FIND + name: The Foundation for Innovative New Diagnostics url: https://www.finddx.org/ avatar: "/training-material/shared/images/FIND.png" github: false ror: "05tcsqz68" ifb: + short_name: IFB name: Institut Français de Bioinformatique url: https://www.france-bioinformatique.fr/ avatar: "/training-material/shared/images/ifb.png" @@ -125,6 +128,7 @@ inab-certh: ror: "03bndpq63" irccs: + short_name: IRCCS name: IRCCS Ospedale San Raffaele url: "https://www.hsr.it/" avatar: "/training-material/shared/images/irccs.jpg" @@ -138,6 +142,7 @@ jetstream2: github: false johnshopkins: + short_name: JHU name: Johns Hopkins University url: "https://www.jhu.edu/" avatar: "/training-material/shared/images/hopkins.png" @@ -145,20 +150,22 @@ johnshopkins: ror: "00za53h95" linq: - name: LINQ - description: LINQ management GmbH - link: "https://linq-management.com/" + short_name: LINQ + name: LINQ management GmbH + url: "https://linq-management.com/" avatar: "/training-material/shared/images/linq.jpg" github: false madland: - name: MAdLand + short_name: MAdLand + name: "MAdLand - Molecular Adaptation to Land: plant evolution to change" joined: 2024-07 url: https://madland.science avatar: "/training-material/shared/images/logo-madland.jpg" github: false minnesotauni: + short_name: UMN name: University of Minnesota url: "https://twin-cities.umn.edu/" avatar: "/training-material/shared/images/minnesotauni.png" @@ -166,6 +173,7 @@ minnesotauni: ror: "017zqws13" MPIIE: + short_name: MPIEE name: Max Planck Institute of Immunology and Epigenetics url: https://www.ie-freiburg.mpg.de avatar: https://raw.githubusercontent.com/bgruening/presentations/master/shared/resources/img/14_MPI_IE_logo_mit_180.gif @@ -181,13 +189,15 @@ mwk: github: false ncbi: + short_name: NCBI name: National Center for Biotechnology Information url: "https://www.ncbi.nlm.nih.gov/" avatar: "/training-material/shared/images/ncbi.png" ror: "02meqm098" nfdi4plants: - name: DataPLANT + short_name: DataPLANT + name: NFDI4Plants/DataPLANT joined: 2024-07 url: https://www.nfdi4plants.de://www.nfdi4plants.de/ avatar: "/training-material/shared/images/logo-dataplant.svg" @@ -200,38 +210,42 @@ NIH: github: false nsf: + short_name: NSF name: National Science Foundation url: https://www.nsf.gov avatar: "/training-material/shared/images/nsf.gif" github: false panacea: - name: PanACEA - description: Pan-African Consortium for the Evaluation of Antituberculosis Antibiotics + short_name: PanACEA + name: Pan-African Consortium for the Evaluation of Antituberculosis Antibiotics url: https://panacea-tb.net/ avatar: "/training-material/shared/images/panacea.jpg" github: false pangens: - name: PANGenS - description: Pan-Africa network for genomic surveillance of poverty related diseases and emerging pathogens + short_name: PANGenS + name: Pan-Africa network for genomic surveillance of poverty related diseases and emerging pathogens url: https://pangens.org/ avatar: "/training-material/shared/images/pangens.jpg" github: false petermac: + short_name: PeterMac name: Peter MacCallum Cancer Centre url: "https://www.petermac.org/" avatar: "/training-material/shared/images/petermac.png" ror: "02a8bt934" pndb: + short_name: PNDB name: Pôle National de Données de Biodiversité url: https://www.pndb.fr/ avatar: "/training-material/shared/images/PNDB_sub.png" github: false psu: + short_name: PSU name: The Pennsylvania State University url: http://www.psu.edu avatar: "/training-material/shared/images/psu.png" @@ -239,20 +253,22 @@ psu: ror: "04p491231" qiime2: + short_name: QIIME2 name: QIIME2 url: https://qiime2.org/ avatar: https://avatars.githubusercontent.com/u/18176583?s=200&v=4 sanbi: - name: SANBI/UWC + short_name: SANBI/UWC url: https://www.sanbi.org/ avatar: "/training-material/shared/images/sanbi.png" - description: "South African National Bioinformatics Institute, University of the Western Cape" + name: "South African National Bioinformatics Institute, University of the Western Cape" github: false ror: "005r3tp02" san-raffaele-uni: + short_name: UniSR name: Università Vita-Salute San Raffaele url: "https://www.unisr.it/" avatar: "/training-material/shared/images/uni_san_raffaele.svg" @@ -267,18 +283,21 @@ sciensano: ror: "04ejags36" sfb992: + short_name: CRC992 name: Collaborative Research Centre 992 url: https://www.sfb992.uni-freiburg.de/ avatar: https://raw.githubusercontent.com/bgruening/presentations/bce348bb606c312d531c479e63a66efc2bc38d44/shared/resources/img/MEDEP.jpg github: false societyprotectionundergroundnetworks: + short_name: SPUN name: Society for the Protection of Underground Networks description: "SPUN is a scientific research organization founded to map mycorrhizal fungal communities and advocate for their protection." url: "https://www.spun.earth" avatar: "/training-material/shared/images/spun-logo.png" swiss-tph: + short_name: SwissTPH name: Swiss Tropical and Public Health Institute url: https://www.swisstph.ch/en/ avatar: "/training-material/shared/images/swiss-tph.png" @@ -286,12 +305,14 @@ swiss-tph: ror: "03adhka07" tb-capt: + short_name: TB-CAPT name: TB-CAPT url: https://www.tb-capt.org/ avatar: "/training-material/shared/images/tb_capt.svg" github: false uga: + short_name: UGA name: Université Grenoble-Alpes url: https://www.univ-grenoble-alpes.fr/ avatar: /training-material/shared/images/logo-uga.svg @@ -299,12 +320,14 @@ uga: ror: "02rx3b187" ukmrc: + short_name: UKRI-MRC name: UK Medical Research Council url: https://mrc.ukri.org avatar: https://raw.githubusercontent.com/nomadscientist/scrnaseq_training/master/logo.png github: false uni-freiburg: + short_name: UFR name: University of Freiburg url: https://www.uni-freiburg.de/ avatar: https://raw.githubusercontent.com/bgruening/presentations/a2e38e4b007994af798320db3a0131c4bb891c0e/shared/resources/img/logo_freiburg.jpg @@ -312,6 +335,7 @@ uni-freiburg: ror: "0245cg223" UTas: + short_name: UTas name: University of Tasmania url: https://www.utas.edu.au/ avatar: "/training-material/shared/images/UTas_logo.png" @@ -319,6 +343,7 @@ UTas: ror: "01nfmeh72" vib: + short_name: VIB name: Vlaams Instituut voor Biotechnologie url: https://vib.be/ avatar: "/training-material/shared/images/logo-vib.png" From eff839c28d79a1d3de4cb516d51d17b69a836805 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 16:05:02 +0200 Subject: [PATCH 15/41] update schemas --- bin/schema-grants.yaml | 6 ------ bin/schema-organisations.yaml | 25 +++++++++++++++++++------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/bin/schema-grants.yaml b/bin/schema-grants.yaml index 3d5a2a37e928f5..d858d72d713594 100644 --- a/bin/schema-grants.yaml +++ b/bin/schema-grants.yaml @@ -93,12 +93,6 @@ mapping: - "2020-01-01" avatar: type: str - funder: - type: bool - description: Set this to true if this entity is a funding agency. - required: true - enum: - - true funding_id: type: str required: true diff --git a/bin/schema-organisations.yaml b/bin/schema-organisations.yaml index a06af6d24fd72e..074e2b7a4623de 100644 --- a/bin/schema-organisations.yaml +++ b/bin/schema-organisations.yaml @@ -10,17 +10,30 @@ mapping: description: | This ideally is your GitHub handle. If you do not have, or do not wish to provide a GitHub username, you may make up another identifier here, but then you must set `github: false` as described below. mapping: + short_name: + type: str + description: | + Name of the organisation, as short as possible (e.g. acronyms) + _examples: + - EBI + - UFR + - NIH + name: type: str required: true description: | - Your preferred name. If you prefer an alias or another name, this is welcome, it does not need to be your legal name. + Full name of the organisation (e.g. acronyms expanded). _examples: - - 张三 - - Alice - - Jane Doe - - Madame Tout-le-Monde - - Γιάννης Παπαδόπουλος + - European Bioinformatics Institute + - University of Freiburg + - National Institutes of Health + + description: + type: str + description: | + A couple sentences of description about the organisation + email: type: str pattern: /@/ From 611b1b7f594b7d4d6ebf95e0a2a8d6ca4737d88b Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 16:37:55 +0200 Subject: [PATCH 16/41] add some funding contributors as suggested by bgruening in #4913 --- .../tutorials/ewas-suite/tutorial.md | 21 +- .../tutorials/hicexplorer/tutorial.md | 18 +- .../fair/tutorials/bioimage-REMBI/tutorial.md | 33 +- .../tutorials/bioimage-metadata/tutorial.md | 15 +- .../tutorials/amr-gene-detection/tutorial.md | 29 +- .../bacterial-genome-annotation/tutorial.md | 48 +-- .../tutorials/funannotate/tutorial.md | 1 + .../tutorials/functional/tutorial.md | 2 + .../tutorials/genome-annotation/tutorial.md | 10 +- .../hela-screen-analysis/tutorial.md | 9 +- .../imaging-introduction/tutorial.md | 11 +- .../multiplex-tissue-imaging-TMA/tutorial.md | 80 ++--- .../tutorial.md | 9 +- .../imaging/tutorials/tutorial-CP/tutorial.md | 9 +- .../galaxy-intro-101-everyone/tutorial.md | 20 +- .../tutorial.md | 12 +- .../galaxy-intro-peaks2genes/tutorial.md | 4 + .../tutorials/galaxy-reproduce/tutorial.md | 9 +- .../tutorials/lcms-dataprocessing/tutorial.md | 312 +++++++++--------- .../tutorials/lcms-preprocessing/tutorial.md | 13 +- .../metabolomics/tutorials/lcms/tutorial.md | 17 +- .../msi-analyte-distribution/tutorial.md | 73 ++-- 22 files changed, 413 insertions(+), 342 deletions(-) diff --git a/topics/epigenetics/tutorials/ewas-suite/tutorial.md b/topics/epigenetics/tutorials/ewas-suite/tutorial.md index 0896265bf4e9f8..ca03203c1734dd 100644 --- a/topics/epigenetics/tutorials/ewas-suite/tutorial.md +++ b/topics/epigenetics/tutorials/ewas-suite/tutorial.md @@ -13,10 +13,15 @@ objectives: key_points: - "Infinium Human Methylation BeadChip is an array based technology to generate DNA methylation profiling at individual CpG loci in the human genome based on Illumina’s bead technology." - "Time and cost efficiency followed by high sample output, and overall quantitative accuracy and reproducibility made Infinium Human Methylation BeadChip one of the most widely used arrays on the market." -contributors: - - kkamieniecka - - khaled196 - - poterlowicz-lab +contributions: + authorship: + - kkamieniecka + - khaled196 + - poterlowicz-lab + editing: + testing: + funding: + - elixir-europe --- This tutorial is based on Hugo W, Shi H, Sun L, Piva M et al.: Non-genomic and Immune Evolution of Melanoma Acquiring MAPKi Resistance {% cite Hugo2015 %}. @@ -112,7 +117,7 @@ The first step of the Infinium Human Methylation BeadChip array analysis is raw > > > After exporting the reference genome from UCSC, we need to make sure that it is in the right dataset build. -> +> > Click on the **Differentially_Methylated_Positions.bed** output in your history to expand it. \ > Set the database build of your dataset to `Human Feb. 2009 (GRCh37/hg19) (hg19)`(if it is not set automatically) > @@ -170,7 +175,7 @@ The main goal of the **Infinium Human Methylation BeadChip** analysis is to simp > {: .hands_on} -> +> > How do we define phenotype covariate? > > > > Phenotype covariate is the set of observable characteristics of an individual resulting from the gene-environment interactions @@ -201,11 +206,11 @@ In addition to downstream analysis users can annotate the differentially methyla > - *"Input Type Gene ID"*: `SYMBOL` > - *"Output Type Gene ID"*: `ENTREZID` > -> 5. Run a GO Enrichment Analysis using {% tool [clusterProfiler go](toolshed.g2.bx.psu.edu/repos/kpbioteam/clusterprofiler_go/clusterprofiler_go/0.1.0) %} on the output of the {% tool [Cluster Profiler Bitr](toolshed.g2.bx.psu.edu/repos/kpbioteam/clusterprofiler_bitr/clusterprofiler_bitr/0.1.0) %} +> 5. Run a GO Enrichment Analysis using {% tool [clusterProfiler go](toolshed.g2.bx.psu.edu/repos/kpbioteam/clusterprofiler_go/clusterprofiler_go/0.1.0) %} on the output of the {% tool [Cluster Profiler Bitr](toolshed.g2.bx.psu.edu/repos/kpbioteam/clusterprofiler_bitr/clusterprofiler_bitr/0.1.0) %} {: .hands_on} ![Functional annotations](../../images/funcann.jpg "Results of GO enrichments analysis for DMPs") - + ID | Description | pvalue | qvalue | geneID | Count --- | --- | --- | --- | --- | --- GO:0048732 | gland development | 1.38E-58 | 4.23E-55 | PTGS2 / KCNC1 / FZD1 /SLC22A18 /SLC22A3 (...) | 372 diff --git a/topics/epigenetics/tutorials/hicexplorer/tutorial.md b/topics/epigenetics/tutorials/hicexplorer/tutorial.md index 25d6e88454cddf..8d5698671590e7 100644 --- a/topics/epigenetics/tutorials/hicexplorer/tutorial.md +++ b/topics/epigenetics/tutorials/hicexplorer/tutorial.md @@ -12,11 +12,19 @@ key_points: - "Hi-C helps to investigate the 3D structure of the genome and to uncover folding principles of chromatin." - "In order to build a Hi-C contact matrix the reads have to be mapped to the reference genome." - "Based on a contact matrix open and closed chromatin (A/B compartments) and topological associating domains (TADs) can be computed." -contributors: - - joachimwolff - - fidelram - - vivekbhr - - polkhe +contributions: + authorship: + - joachimwolff + - fidelram + - vivekbhr + - polkhe + editing: + testing: + funding: + - elixir-europe + - deNBI + - uni-freiburg + - sfb992 --- diff --git a/topics/fair/tutorials/bioimage-REMBI/tutorial.md b/topics/fair/tutorials/bioimage-REMBI/tutorial.md index c90bf4ccdbcbf0..ced6d8392faa32 100644 --- a/topics/fair/tutorials/bioimage-REMBI/tutorial.md +++ b/topics/fair/tutorials/bioimage-REMBI/tutorial.md @@ -13,7 +13,7 @@ objectives: - Find out what REMBI is and why it is useful - Categorise what metadata belongs to each of the submodules of REMBI - Gather the metadata for an example bioimage dataset - + time_estimation: "15m" key_points: @@ -23,7 +23,7 @@ tags: - fair - data management - bioimaging - + priority: 5 contributions: @@ -34,6 +34,7 @@ contributions: - poterlowicz-lab funding: - elixir-uk-dash + - elixir-europe subtopic: fair-data requirements: @@ -67,7 +68,7 @@ REMBI (Recommended Metadata for Biological Images) was proposed as a draft metad > In the [REMBI paper](https://www.nature.com/articles/s41592-021-01166-8), the authors consider three potential user groups who require different metadata. Find out what are these three groups and their metadata requirements. > > > -> > The identified three user groups are: Biologists, Imaging scientists, Computer-vision researchers. +> > The identified three user groups are: Biologists, Imaging scientists, Computer-vision researchers. > > - A research biologist may be interested in the biological sample that has been imaged to compare it to similar samples that they are working with. > > - An imaging scientist may be interested in how the image was acquired so they can improve upon current image acquisition techniques. > > - A computer vision researcher may be interested in annotated ground-truth segmentations, that can be obtained from the image, so they can develop faster and more accurate algorithms. @@ -78,11 +79,11 @@ REMBI (Recommended Metadata for Biological Images) was proposed as a draft metad > Instructor Note > > If you're an instructor leading this training, you might ask people to work in small groups for this exercise and encourage the discussion. Ask group members to share which of the user groups they identify as and what metadata they would want. -> +> {: .tip} -# Categories of metadata -REMBI covers different categories of metadata, such as: +# Categories of metadata +REMBI covers different categories of metadata, such as: - study - study component - biosample @@ -92,9 +93,9 @@ REMBI covers different categories of metadata, such as: - image correlation - analyzed data -Within each module, there are attributes that should be included to make the published data FAIR. We will explore all the modules and attributes suggested by REMBI and we'll show some examples as well. +Within each module, there are attributes that should be included to make the published data FAIR. We will explore all the modules and attributes suggested by REMBI and we'll show some examples as well. -## Study +## Study The first module of REMBI metadata describes the Study and should include: - Study type - Study description @@ -173,11 +174,11 @@ This should include all the information that relates to all the data in the proj A study component can be thought of as an experiment, both the physical experiment and subsequent data analysis, or a series of experiments that have been conducted with the same aim in mind. -The associated metadata should describe the imaging method used and include a description of the image dataset. The REMBI guidelines store high-level metadata in the study component and then divide the more detailed metadata into other modules. +The associated metadata should describe the imaging method used and include a description of the image dataset. The REMBI guidelines store high-level metadata in the study component and then divide the more detailed metadata into other modules. Within the Study component we include the Imaging Method which should describe the techniques used to acquire the raw data. This could be one or multiple methods, which should be part of a relevant ontology. For Confocal Microscopy data, we can use the Biological Imaging Methods Ontology, although it is also present in a number of other ontologies. -The description of the study component should include an overview of what was imaged as well as any processed data that is created during analysis. +The description of the study component should include an overview of what was imaged as well as any processed data that is created during analysis. > Example > @@ -195,7 +196,7 @@ The description of the study component should include an overview of what was im > Storing metadata > -> You could either choose to store the metadata in the same file as your study data or have a new file for each study component. This could be stored in the same place as your study metadata, or you could create a subdirectory structure. +> You could either choose to store the metadata in the same file as your study data or have a new file for each study component. This could be stored in the same place as your study metadata, or you could create a subdirectory structure. {: .tip} ## Biosample @@ -278,9 +279,9 @@ You can leave out some of the variables if they are not part of your experiment. ## Image acquisition Here you should include all the information about the instrument you used and how it was set up. Like with the specimen metadata, describe this information as though you are speaking to someone who already knows how to use a similar instrument. What would they need to know to produce the same image data? - + Check with your facility manager if they have any guidelines for what details need to be recorded for your particular instrument. Make sure that the parameters you record can actually be used by someone else if they don’t have exactly the same instrument or setup. For example, don’t say that you used a certain percentage of laser power, as this doesn’t tell you how much power was used unless you also provide the total power of the laser. If the instrument software has automatically generated a metadata file, remember to save this. Depending on its content, this may be sufficient. - + Start with the details of the equipment for the Instrument Attributes. If this is commercial equipment, include the make and model, a short description of what type of instrument it is and details about its configuration. If the instrument is bespoke, you will need to include more details. Next, you should include image acquisition parameters. These relate to how the instrument was set up for the particular experiment. Some of these may be captured automatically by the instrument’s software, so make things easy for yourself and check if a file is generated and what’s in it. If a file is generated, then you only need to manually record anything that is missing from the file. > Example @@ -318,8 +319,8 @@ Start with the details of the equipment for the Instrument Attributes. If this i > Helpful resources > -> To help you collect the information for your own data, you might have a look at the local resources from your institution or universities. For example, at Warwick University, there are [webpages](https://warwick.ac.uk/fac/sci/med/research/biomedical/facilities/camdu/methodsreporting/) describing the metadata that needs to be collected for some of the microscopes. -> +> To help you collect the information for your own data, you might have a look at the local resources from your institution or universities. For example, at Warwick University, there are [webpages](https://warwick.ac.uk/fac/sci/med/research/biomedical/facilities/camdu/methodsreporting/) describing the metadata that needs to be collected for some of the microscopes. +> {: .tip} ## Image data @@ -424,4 +425,4 @@ This section should not include metadata for any image data, including processed For more examples, check out REMBI Supplementary Information - either in [pdf](https://static-content.springer.com/esm/art%3A10.1038%2Fs41592-021-01166-8/MediaObjects/41592_2021_1166_MOESM1_ESM.pdf) or [spreadsheet](https://docs.google.com/spreadsheets/d/1Ck1NeLp-ZN4eMGdNYo2nV6KLEdSfN6oQBKnnWU6Npeo/edit#gid=1023506919). -At first glance, it might seem to be quite a stretch to collect all that metadata! But don’t get discouraged - following those guidelines will ensure better communication between the scientists and will make your research FAIR: Findable, Accessible, Interoperable, Reusable. During big data era when we are surrounded by so much resources, it’s crucial to get good data management habits, share them with others and hence contribute to the development of Science toghether. +At first glance, it might seem to be quite a stretch to collect all that metadata! But don’t get discouraged - following those guidelines will ensure better communication between the scientists and will make your research FAIR: Findable, Accessible, Interoperable, Reusable. During big data era when we are surrounded by so much resources, it’s crucial to get good data management habits, share them with others and hence contribute to the development of Science toghether. diff --git a/topics/fair/tutorials/bioimage-metadata/tutorial.md b/topics/fair/tutorials/bioimage-metadata/tutorial.md index 1fd87b7a0ba238..d67b976d9be946 100644 --- a/topics/fair/tutorials/bioimage-metadata/tutorial.md +++ b/topics/fair/tutorials/bioimage-metadata/tutorial.md @@ -25,7 +25,7 @@ tags: - fair - data management - bioimaging - + priority: 4 contributions: @@ -36,6 +36,7 @@ contributions: - poterlowicz-lab funding: - elixir-uk-dash + - elixir-europe subtopic: fair-data requirements: @@ -44,7 +45,7 @@ requirements: tutorials: - fair-intro - data-management - + follow_up_training: - @@ -55,7 +56,7 @@ follow_up_training: - type: "internal" topic_name: imaging - + --- # FAIR Bioimaging @@ -76,7 +77,7 @@ But the question remains: where can I submit my data? Currently the main reposit > - [IDR: Image Data Repository](https://idr.openmicroscopy.org/) > - [EMPIAR: Electron Microscopy Public Image Archive](https://www.ebi.ac.uk/empiar/) > - [BioImage Archive](https://www.ebi.ac.uk/bioimage-archive/) -> +> > Visit their websites and find out what their scope is or what sorts of datasets they accept. > > > @@ -91,7 +92,7 @@ But the question remains: where can I submit my data? Currently the main reposit > Repositories everywhere > > As well as these repositories, your Institute may have their own repository. For example, at the Warwick University, there is also [OMERO](https://warwick.ac.uk/fac/sci/med/research/biomedical/facilities/camdu/training/omero-warwick-guide_2.pdf) and [WRAP](https://wrap.warwick.ac.uk/). -> +> {: .tip} @@ -117,7 +118,7 @@ Now we know what repositories are available, but how to decide which one is best > > 2. It is strongly recommended that submitters make their datasets available under [CC-BY](https://creativecommons.org/licenses/by/4.0/) license. > > 3. As specified on the [IDR website](https://idr.openmicroscopy.org/about/submission.html), dataset size is typically not an issue, but for sizes significantly larger than 1000 GB special planning may be needed. > > - **EMPIAR: Electron Microscopy Public Image Archive**: -> > 1. Provide image data in the formats in which they are uploaded, but recommended is the use of common formats in the field including MRC, MRCS, TIFF, DM4, IMAGIC, SPIDER, MRC FEI, RAW FEI and BIG DATA VIEWER HDF5. +> > 1. Provide image data in the formats in which they are uploaded, but recommended is the use of common formats in the field including MRC, MRCS, TIFF, DM4, IMAGIC, SPIDER, MRC FEI, RAW FEI and BIG DATA VIEWER HDF5. > > 2. All data in EMPIAR is freely and publicly available to the global community under the [CC0](https://creativecommons.org/share-your-work/public-domain/cc0/) license. > > 3. As specified on the [EMPIAR page](https://www.ebi.ac.uk/empiar/deposition/manual/#manIntro), typically having more than 4000 files in a directory has a tendency to slow down access considerably. It is recommended in this case to sub-divide the directory into subdirectories with no more than 4000 files each. If you have a single file larger than 1 TB, contact EMPAIR in advance. > > To find out more, check the [FAQ page](https://www.ebi.ac.uk/empiar/faq). @@ -128,7 +129,7 @@ Now we know what repositories are available, but how to decide which one is best > > - Less than 50 GB total size, less than 20GB per file – use submission tool > > - Up to 1TB total size – use FTP > > - Anything larger – use Aspera -> > +> > > > To find out more, check the [FAQ page](https://www.ebi.ac.uk/bioimage-archive/help-faq/). > {: .solution} > diff --git a/topics/genome-annotation/tutorials/amr-gene-detection/tutorial.md b/topics/genome-annotation/tutorials/amr-gene-detection/tutorial.md index fd77f245dcc2a7..37ccaf4a532fb0 100644 --- a/topics/genome-annotation/tutorials/amr-gene-detection/tutorial.md +++ b/topics/genome-annotation/tutorials/amr-gene-detection/tutorial.md @@ -43,6 +43,7 @@ contributions: funding: - avans-atgm - abromics + - elixir-europe follow_up_training: - type: internal topic_name: visualisation @@ -69,9 +70,9 @@ recordings: Antimicrobial resistance (AMR) is a global phenomenon with no geographical or species boundaries, which poses an important threat to human, animal and environmental health. It is a complex and growing problem that compromises our ability to treat bacterial infections. -AMR gene content can be assessed from whole genome sequencing to detect known resistance mechanisms and potentially identify novel mechanisms. +AMR gene content can be assessed from whole genome sequencing to detect known resistance mechanisms and potentially identify novel mechanisms. -To illustrate the process to identify AMR gene in a bacterial genome, we take an assembly of a bacterial genome (KUN1163 sample) generated by following a [bacterial genome assembly tutorial]({% link topics/assembly/tutorials/mrsa-illumina/tutorial.md %}) from data produced in "Complete Genome Sequences of Eight Methicillin-Resistant *Staphylococcus aureus* Strains Isolated from Patients in Japan" ({% cite Hikichi_2019 %}). +To illustrate the process to identify AMR gene in a bacterial genome, we take an assembly of a bacterial genome (KUN1163 sample) generated by following a [bacterial genome assembly tutorial]({% link topics/assembly/tutorials/mrsa-illumina/tutorial.md %}) from data produced in "Complete Genome Sequences of Eight Methicillin-Resistant *Staphylococcus aureus* Strains Isolated from Patients in Japan" ({% cite Hikichi_2019 %}). > Methicillin-resistant *Staphylococcus aureus* (MRSA) is a major pathogen > causing nosocomial infections, and the clinical manifestations of MRSA @@ -146,13 +147,13 @@ To identify AMR genes in contigs, tools like ABRicate or staramr ({% cite bharat * __Plasmid__: Plasmid types that were found for the isolate. * __Scheme__: The MLST scheme used - MLST stands for MultiLocus Sequence Typing. It is a technique for the typing of multiple loci, using DNA sequences of internal fragments of multiple housekeeping genes to characterize isolates of microbial species. + MLST stands for MultiLocus Sequence Typing. It is a technique for the typing of multiple loci, using DNA sequences of internal fragments of multiple housekeeping genes to characterize isolates of microbial species. Here, **starmr** uses [mlst](https://github.com/tseemann/mlst) to scan the contig files against traditional [PubMLST](https://pubmlst.org/) typing schemes. The correspondance between the scheme and the bacteria genus and species is accessible in the [map](https://github.com/tseemann/mlst/blob/master/db/scheme_species_map.tab) * __Sequence Type__: The sequence type that's assigned when combining all allele types * __Genome Length__: The isolate/genome file(s) genome length(s) - * __N50 value__: The isolate/genome file(s) N50 value(s) + * __N50 value__: The isolate/genome file(s) N50 value(s) * __Number of Contigs Greater Than Or Equal To 300 bp__: The number of contigs greater or equal to 300 base pair in the isolate/genome file(s) * __Quality Module Feedback__: The isolate/genome file(s) detailed feedback for the quality metrics @@ -174,7 +175,7 @@ To identify AMR genes in contigs, tools like ABRicate or staramr ({% cite bharat > > 1. There is one genome (1 line) > > 2. The genome has failed the quality (column 2), because the genome length is not within the acceptable length range (last column). > > 3. We can summarize starmr output and Table 1 in {% cite Hikichi_2019 %}: - > > + > > > > Antibiotic name | Abbreviation | staramr | {% cite Hikichi_2019 %} > > --- | --- | --- | --- > > Amikacin | | Yes | @@ -190,7 +191,7 @@ To identify AMR genes in contigs, tools like ABRicate or staramr ({% cite bharat > > Spectinomycin | | Yes | > > Tetracycline | | Yes | > > Tobramycin | | Yes | - > > + > > > > 4. The scheme is saureus, so *Staphylococcus aureus* (given the [scheme genus map](https://github.com/tseemann/mlst/blob/master/db/scheme_species_map.tab)), which is coherent with MRSA > {: .solution} > @@ -227,8 +228,8 @@ To identify AMR genes in contigs, tools like ABRicate or staramr ({% cite bharat > > Inspect the staramr output > > > > 1. There are: - > > - 1 MSLT - > > - 5 plasmid *rep* genes + > > - 1 MSLT + > > - 5 plasmid *rep* genes > > - 7 resistance genes > > 2. The plasmid genes are on contig00019 (3 genes - coherent with {% cite Lozano_2012 %}), contig00024, and contig00002. > > 3. The resistance genes are: @@ -301,7 +302,7 @@ CARD can be very helpful to check all the resistance genes and check if it is lo # Visualization of the ARGs and plasmid genes in their genomic context -We would like to look at the ARGs and plasmid genes in their genomic context. To do that, we will usie [JBrowse](https://jbrowse.org/jb2/) ({% cite diesh2023jbrowse %}) with several information: +We would like to look at the ARGs and plasmid genes in their genomic context. To do that, we will usie [JBrowse](https://jbrowse.org/jb2/) ({% cite diesh2023jbrowse %}) with several information: 1. Assembly as the reference 2. ARGs location @@ -330,13 +331,13 @@ The first step is to extract the location of the ARGs and plasmid genes on the c > {: .solution} {: .question} -This table can not be used directly in JBrowse. It first needs to be transformed in a standard format: GFF3, a file format used for describing genes and other features of DNA, RNA and protein sequences. +This table can not be used directly in JBrowse. It first needs to be transformed in a standard format: GFF3, a file format used for describing genes and other features of DNA, RNA and protein sequences. > GFF3 file format -> +> > A GFF is a tab delimited file with 9 fields per line: -> 1. **seqid**: The name of the sequence where the feature is located. +> 1. **seqid**: The name of the sequence where the feature is located. > 2. **source**: The algorithm or procedure that generated the feature. This is typically the name of a software or database. > 3. **type**: The feature type name, like "gene" or "exon". In a well structured GFF file, all the children features always follow their parents in a single block (so all exons of a transcript are put after their parent "transcript" feature line and before any other parent transcript line). In GFF3, all features and their relationships should be compatible with the standards released by the Sequence Ontology Project. > 4. **start**: Genomic start of the feature, with a 1-base offset. This is in contrast with other 0-offset half-open sequence formats, like BED. @@ -344,7 +345,7 @@ This table can not be used directly in JBrowse. It first needs to be transformed > 6. **score**: Numeric value that generally indicates the confidence of the source in the annotated feature. A value of "." (a dot) is used to define a null value. > 7. **strand**: Single character that indicates the strand of the feature. This can be "+" (positive, or 5'->3'), "-", (negative, or 3'->5'), "." (undetermined), or "?" for features with relevant but unknown strands. > 8. **phase**: phase of CDS features; it can be either one of 0, 1, 2 (for CDS features) or "." (for everything else). See the section below for a detailed explanation. -> 9. **attributes**: A list of tag-value pairs separated by a semicolon with additional information about the feature. +> 9. **attributes**: A list of tag-value pairs separated by a semicolon with additional information about the feature. {: .comment} > Create a GFF file @@ -391,7 +392,7 @@ In addition to the ARGs and plasmid genes, it would be good to have extra inform
-**Bakta** is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs. +**Bakta** is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs. > Annotate the contigs > diff --git a/topics/genome-annotation/tutorials/bacterial-genome-annotation/tutorial.md b/topics/genome-annotation/tutorials/bacterial-genome-annotation/tutorial.md index 4570d4324dc994..9585e2649ca1d6 100644 --- a/topics/genome-annotation/tutorials/bacterial-genome-annotation/tutorial.md +++ b/topics/genome-annotation/tutorials/bacterial-genome-annotation/tutorial.md @@ -37,6 +37,8 @@ contributions: - bebatut funding: - abromics + - elixir-europe + - ifb follow_up_training: - type: internal topic_name: genome-annotation @@ -66,11 +68,11 @@ recordings: --- -After sequencing and assembly, a genome can be annotated. It is an essential step to describe the genome. +After sequencing and assembly, a genome can be annotated. It is an essential step to describe the genome. Genome annotation consists in describing the structure and function of the components of the genome, by predicting, analyzing, and interpreting them in order to extract their biological significance and understand the biological processes in which they participate. Among other things, it identifies the locations of genes and all the coding regions in a genome (*structural annotation*) and determines what those genes do (*functional annotation*). -To illustrate the process to annotate a bacterial genome, we take an assembly of a bacterial genome (KUN1163 sample) generated by following a [bacterial genome assembly tutorial]({% link topics/assembly/tutorials/mrsa-illumina/tutorial.md %}) from data produced in "Complete Genome Sequences of Eight Methicillin-Resistant *Staphylococcus aureus* Strains Isolated from Patients in Japan" ({% cite Hikichi_2019 %}). +To illustrate the process to annotate a bacterial genome, we take an assembly of a bacterial genome (KUN1163 sample) generated by following a [bacterial genome assembly tutorial]({% link topics/assembly/tutorials/mrsa-illumina/tutorial.md %}) from data produced in "Complete Genome Sequences of Eight Methicillin-Resistant *Staphylococcus aureus* Strains Isolated from Patients in Japan" ({% cite Hikichi_2019 %}). > Methicillin-resistant *Staphylococcus aureus* (MRSA) is a major pathogen > causing nosocomial infections, and the clinical manifestations of MRSA @@ -119,11 +121,11 @@ Any analysis should get its own Galaxy history. So let's start by creating a new For annotating the contigs, several tools exists to do that: Prokka ({% cite seemann2014prokka %}), Bakta ({% cite schwengers2021bakta %}), etc. Here, we use **Bakta** as recommended by {% include _includes/contributor-badge.html id="tseemann" %} as the successor of **Prokka**. -**Bakta** is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and metagenome-assembled genomes (MAGs). It implements a comprehensive annotation workflow for coding and non-coding genes (*i.e.* tRNA, rRNA). +**Bakta** is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and metagenome-assembled genomes (MAGs). It implements a comprehensive annotation workflow for coding and non-coding genes (*i.e.* tRNA, rRNA). ![Flow diagram of Bakta. It depicts the sequential steps and connections involved in its functioning.](./images/bakta_wf.png "Overview of the Bakta annotation workflow ({% cite schwengers2021bakta %})") -It is also able to detect and annotate small proteins (sORF). Predicted CDS are annotated using an alignment-free protein sequence identification approach with cross-references to public databases via stable identifiers. +It is also able to detect and annotate small proteins (sORF). Predicted CDS are annotated using an alignment-free protein sequence identification approach with cross-references to public databases via stable identifiers. > Contig annotation > @@ -135,7 +137,7 @@ It is also able to detect and annotate small proteins (sORF). Predicted CDS are > - In *"Optional annotation"*: > - *"Keep original contig header"*: `Yes` > - In *"Selection of the output files"*: -> - *"Output files selection"*: +> - *"Output files selection"*: > - `Annotation file in TSV` > - `Annotation and sequence in GFF3` > - `Feature nucleotide sequences as FASTA` @@ -153,7 +155,7 @@ It is also able to detect and annotate small proteins (sORF). Predicted CDS are > 2. How long is the draft genome? > 3. How many CDSs have been found? > 4. How many small proteins? - > 5. Which other components have been found? + > 5. Which other components have been found? > > How does it compare to results for KUN1163 in [Table 1](https://journals.asm.org/doi/10.1128/mra.01212-19#tab1) in {% cite Hikichi_2019 %}? > @@ -164,9 +166,9 @@ It is also able to detect and annotate small proteins (sORF). Predicted CDS are > > 3. 2,717 CDSs, a bit more than the expected 2,704 CDSs in [Table 1](https://journals.asm.org/doi/10.1128/mra.01212-19#tab1) in {% cite Hikichi_2019 %} > > 4. 5 sORFs. There is no information about sORFs in {% cite Hikichi_2019 %} > > 5. Other components - > > + > > > > Components | Bakta | {% cite Hikichi_2019 %} - > > --- | --- | --- + > > --- | --- | --- > > tRNAs | 57 | 61 > > Transfer-messenger RNA (tmRNAs) | 1 | 1 > > rRNAs | 9 | 5 @@ -224,7 +226,7 @@ It is also able to detect and annotate small proteins (sORF). Predicted CDS are GFF is a file format used for describing genes and other features of DNA, RNA and protein sequences. It is a tab delimited file with 9 fields per line: - 1. **seqid**: The name of the sequence where the feature is located. + 1. **seqid**: The name of the sequence where the feature is located. 2. **source**: The algorithm or procedure that generated the feature. This is typically the name of a software or database. 3. **type**: The feature type name, like "gene" or "exon". In a well structured GFF file, all the children features always follow their parents in a single block (so all exons of a transcript are put after their parent "transcript" feature line and before any other parent transcript line). In GFF3, all features and their relationships should be compatible with the standards released by the Sequence Ontology Project. 4. **start**: Genomic start of the feature, with a 1-base offset. This is in contrast with other 0-offset half-open sequence formats, like BED. @@ -232,7 +234,7 @@ It is also able to detect and annotate small proteins (sORF). Predicted CDS are 6. **score**: Numeric value that generally indicates the confidence of the source in the annotated feature. A value of "." (a dot) is used to define a null value. 7. **strand**: Single character that indicates the strand of the feature. This can be "+" (positive, or 5'->3'), "-", (negative, or 3'->5'), "." (undetermined), or "?" for features with relevant but unknown strands. 8. **phase**: phase of CDS features; it can be either one of 0, 1, 2 (for CDS features) or "." (for everything else). See the section below for a detailed explanation. - 9. **attributes**: A list of tag-value pairs separated by a semicolon with additional information about the feature. + 9. **attributes**: A list of tag-value pairs separated by a semicolon with additional information about the feature. > > @@ -246,12 +248,12 @@ It is also able to detect and annotate small proteins (sORF). Predicted CDS are {: .question} - Plot of the annotation as circular genome annotation - + ![A circular plot showcasing the draft genome, providing a visual representation of its genetic information.](./images/bakta_plot.png) > > - > 1. What the 2 rings in the center? + > 1. What the 2 rings in the center? > 2. How are plotted the features? > > > @@ -282,7 +284,7 @@ To identify plasmids in our contigs, we use **PlasmidFinder** ({% cite carattoli - `raw_results.txt`: A text file containing the result table and alignments - `results.tsv`: A tabular file with the following columns: - + - **Database** - **Plasmid**: Plasmid against which the input genome has been aligned. - **Identity**: Percent identity in the alignment between the best matching plasmid in the database and the corresponding sequence in the inputgenome (also called the high-scoring segment pair (HSP)). A perfect alignment is 100%, but must also cover the entire length of the plasmid in the database (compare example 1 and 3). @@ -307,7 +309,7 @@ To identify plasmids in our contigs, we use **PlasmidFinder** ({% cite carattoli > > - CP000737, AP003139 (2 times) correspond to *Staphylococcus aureus* plasmids > > - AF503772 corresponds to a *Enterococcus faecalis* plasmid > > - CP003584 corresponds to a *Enterococcus faecium* plasmid - > > + > > > > 4. All plasmid sequences corresponding to *Staphylococcus aureus* plasmids are all on contig00019, making this contig likely a plasmid. In addition, this contig has a length of 30,347 bp, which is similar to the expected length of the plasmid for KUN1163 in [Table 1](https://journals.asm.org/doi/10.1128/mra.01212-19#tab1) in {% cite Hikichi_2019 %} > {: .solution} {: .question} @@ -335,7 +337,7 @@ To detect integrons, we will use **IntegronFinder** ({% cite neron2022integronfi - Complete integron: Integron with integron integrase nearby attC site(s) - In0 element: Integron integrase only, without any attC site nearby - - CALIN element: Cluster of attC sites Lacking INtegrase nearby + - CALIN element: Cluster of attC sites Lacking INtegrase nearby > Integron identification > @@ -392,7 +394,7 @@ To detect IS elements, we will use **ISEScan** ({% cite xie2017isescan %}). **IS > > > > 1. 20 > > 2. Using {% tool [Group data by a column](Grouping1) %} to group and count on 1st column, we find: - > > + > > > > Contig | IS element number > > --- | --- > > contig00001 | 2 @@ -408,17 +410,17 @@ To detect IS elements, we will use **ISEScan** ({% cite xie2017isescan %}). **IS > > contig00019 | 3 > > contig00027 | 1 > > contig00032 | 1 - > > contig00037 | 1 + > > contig00037 | 1 > > > > 3. As for previous question, when grouping and counting on 2nd column, we find 5 IS families: - > > + > > > > IS families | Identified IS elements > > --- | --- > > IS1182 | 4 > > IS21 | 7 > > IS3 | 3 > > IS6 | 5 - > > ISL3 | 1 + > > ISL3 | 1 > > > {: .solution} {: .question} @@ -431,7 +433,7 @@ To detect IS elements, we will use **ISEScan** ({% cite xie2017isescan %}). **IS # Visualisation of the annotation -We would like to look at the annotation using [JBrowse](https://jbrowse.org/jb2/) ({% cite diesh2023jbrowse %}) with several information: +We would like to look at the annotation using [JBrowse](https://jbrowse.org/jb2/) ({% cite diesh2023jbrowse %}) with several information: 1. Annotations identified by **Bakta** 2. Plasmid sequences identified by **PlasmidFinder** @@ -492,11 +494,11 @@ We would like to look at the annotation using [JBrowse](https://jbrowse.org/jb2/ > Transform IntegronFinder output to GFF if integrons found > **IntegronFinder** tabular output can be transformed to GFF by: -> +> > 1. Replace `NA` values on column 7 by `0` > 2. Remove the first two lines > 3. Transform to GFF3 -> +> > > Transform IntegronFinder to GFF > > 1. {% tool [Replace Text in a specific column](toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/9.3+galaxy1) %} with the following parameters: > > - {% icon param-file %} *"File to process"*: tabular output of **IntegronFinder** @@ -596,7 +598,7 @@ If it takes too long to build the JBrowse instance, you can view an embedded one > > > > > -> > 1. Yes all sequences in the **PlasmidFinder** track are also in the **Bakta** track. For +> > 1. Yes all sequences in the **PlasmidFinder** track are also in the **Bakta** track. For > > 2. All Insertion Sequences in the **ISEScan** track are also in the **Bakta** track, but the Terminanl Inverted repeats are not in the **Bakta** track > {: .solution} {: .question} diff --git a/topics/genome-annotation/tutorials/funannotate/tutorial.md b/topics/genome-annotation/tutorials/funannotate/tutorial.md index fb5462249d12b5..ab6fa724b8d955 100644 --- a/topics/genome-annotation/tutorials/funannotate/tutorial.md +++ b/topics/genome-annotation/tutorials/funannotate/tutorial.md @@ -36,6 +36,7 @@ contributions: funding: - gallantries - eurosciencegateway + - elixir-europe abbreviations: NMDS: Non-metric multidimensional scaling diff --git a/topics/genome-annotation/tutorials/functional/tutorial.md b/topics/genome-annotation/tutorials/functional/tutorial.md index 2cf6c7bc3d7bd5..3fdadbaadb9977 100644 --- a/topics/genome-annotation/tutorials/functional/tutorial.md +++ b/topics/genome-annotation/tutorials/functional/tutorial.md @@ -20,6 +20,8 @@ contributions: funding: - gallantries - eurosciencegateway + - elixir-europe + - ifb subtopic: eukaryote priority: 6 --- diff --git a/topics/genome-annotation/tutorials/genome-annotation/tutorial.md b/topics/genome-annotation/tutorials/genome-annotation/tutorial.md index 275a7d0882fd80..a19eac923b1597 100644 --- a/topics/genome-annotation/tutorials/genome-annotation/tutorial.md +++ b/topics/genome-annotation/tutorials/genome-annotation/tutorial.md @@ -11,9 +11,13 @@ objectives: time_estimation: "2H" level: Introductory key_points: -contributors: - - erxleben - - bgruening +contributions: + authorship: + - erxleben + - bgruening + funding: + - eurosciencegateway + - elixir-europe subtopic: prokaryote --- diff --git a/topics/imaging/tutorials/hela-screen-analysis/tutorial.md b/topics/imaging/tutorials/hela-screen-analysis/tutorial.md index 521803cb12e3d9..55b9fdcf9661cd 100644 --- a/topics/imaging/tutorials/hela-screen-analysis/tutorial.md +++ b/topics/imaging/tutorials/hela-screen-analysis/tutorial.md @@ -34,9 +34,12 @@ follow_up_training: tutorials: - machinelearning time_estimation: "1H" -contributors: - - thomaswollmann - - kostrykin +contributions: + authorship: + - thomaswollmann + - kostrykin + funding: + - elixir-europe tags: - HeLa diff --git a/topics/imaging/tutorials/imaging-introduction/tutorial.md b/topics/imaging/tutorials/imaging-introduction/tutorial.md index 9e9837e8b31fca..68b84a17d01001 100644 --- a/topics/imaging/tutorials/imaging-introduction/tutorial.md +++ b/topics/imaging/tutorials/imaging-introduction/tutorial.md @@ -24,10 +24,13 @@ follow_up_training: topic_name: imaging tutorials: - hela-screen-analysis -contributors: - - thomaswollmann - - shiltemann - - kostrykin +contributions: + authorship: + - thomaswollmann + - shiltemann + - kostrykin + funding: + - elixir-europe tags: - HeLa diff --git a/topics/imaging/tutorials/multiplex-tissue-imaging-TMA/tutorial.md b/topics/imaging/tutorials/multiplex-tissue-imaging-TMA/tutorial.md index ea2ee661b59307..6d72adafef7d53 100644 --- a/topics/imaging/tutorials/multiplex-tissue-imaging-TMA/tutorial.md +++ b/topics/imaging/tutorials/multiplex-tissue-imaging-TMA/tutorial.md @@ -8,7 +8,7 @@ questions: - What tools are available for downstream analysis of multiplex tissue images in Galaxy? - How do I pre-process and analyze Tissue Microarray data? - How can I visualize multiplex tissue images and associated data? -- How can I assign phenotypes to cells in an MTI dataset? +- How can I assign phenotypes to cells in an MTI dataset? objectives: - Understand the tools available in Galaxy for multiplex tissue imaging analysis - Analyze and visualize publicly available TMA data using Galaxy @@ -19,10 +19,10 @@ key_points: - There are powerful interactive visualization tools available in Galaxy that can combine the real images with associated data - Tissue Microarray data can be analyzed using workflows that invoke MTI tools in batch - Segmentation quality can vary significantly depending on features of the input image, tool used, and parameters -contributors: -- CameronFRWatson -- alliecreason - +contributions: + authorship: + - CameronFRWatson + - alliecreason --- @@ -77,8 +77,8 @@ Multiplex tissue images come in a variety of forms and file-types depending on t {: .hands_on} > **Imaging platform differences** -> -> The Exemplar-002 raw images are in *ome.tiff* format; however, commonly seen raw file-types are *ome.tiff*, *tiff*, *czi*, and *svs*. If your input images are not *ome.tiff* or *tiff*, you may have to edit the dataset attributes in Galaxy to allow tools to recognize them as viable inputs. +> +> The Exemplar-002 raw images are in *ome.tiff* format; however, commonly seen raw file-types are *ome.tiff*, *tiff*, *czi*, and *svs*. If your input images are not *ome.tiff* or *tiff*, you may have to edit the dataset attributes in Galaxy to allow tools to recognize them as viable inputs. > {: .warning} @@ -91,7 +91,7 @@ The raw files for each round (10 in total) of the exemplar-002 data are availabl Commonly, raw MTI data will consist of one image per round of imaging. These individual round images are frequently captured in tiles, and there can be slight variations in how each tile was illuminated across the course of imaging. Prior to tile stitching and image registration, the tiles have to undergo illumination correction with **BaSiC Illumination** ({% cite Peng2017 %}) to account for this. Unlike many of the other tools in this workflow, BaSiC has no extra parameters to think about: Just input the collection of raw images and press *go*! -Two new list collections will appear in the history upon completion: +Two new list collections will appear in the history upon completion: - BaSiC Illumination on Collection `X`: FFP (flat-field) - BaSiC Illumination on Collection `X`: DFP (deep-field) @@ -107,11 +107,11 @@ Two new list collections will appear in the history upon completion: # Stitching and registration with **ASHLAR** -After illumination is corrected across round tiles, the tiles must be stitched together, and subsequently, each round mosaic must be registered together into a single pyramidal OME-TIFF file. **ASHLAR** ({% cite Muhlich2022 %}) from MCMICRO provides both of these functions. +After illumination is corrected across round tiles, the tiles must be stitched together, and subsequently, each round mosaic must be registered together into a single pyramidal OME-TIFF file. **ASHLAR** ({% cite Muhlich2022 %}) from MCMICRO provides both of these functions. > Important detail: Marker File > -> **ASHLAR** optionally reads a marker metadata file to name the channels in the output OME-TIFF image. This marker file will also be used in later steps. Make sure that the marker file is comma-separated and has the `marker_names` as the third column (Figure 3.). +> **ASHLAR** optionally reads a marker metadata file to name the channels in the output OME-TIFF image. This marker file will also be used in later steps. Make sure that the marker file is comma-separated and has the `marker_names` as the third column (Figure 3.). > > ![screenshot of the markers table](../../images/multiplex-tissue-imaging-TMA/ex2_markersFile.png "Markers file, used both in ASHLAR and downstream steps. Critically, the marker_names are in the third column.") > @@ -131,16 +131,16 @@ After illumination is corrected across round tiles, the tiles must be stitched t > {: .hands_on} -> **Imaging platform differences** -> -> ASHLAR, among other tools in the MCMICRO and Galaxy-ME pre-processing tools have some parameters that are specific to the +> **Imaging platform differences** +> +> ASHLAR, among other tools in the MCMICRO and Galaxy-ME pre-processing tools have some parameters that are specific to the > imaging patform used. By default, ASHLAR is oriented to work with images from RareCyte scanners. AxioScan scanners render images > in a different orientation. Because of this, when using ASHLAR on AxioScan images, it is important to select the **Flip Y-Axis** > parameter to *Yes* -> +> > ASHLAR will work for most imaging modalities; however, certain modalities require different tools to be registered. For example, -> multiplex immunohistochemistry (mIHC) images must use an aligner that registers each moving image to a reference Hematoxylin image. -> For this, Galaxy-ME includes the alternative registration tool {% tool **PALOM** %}. +> multiplex immunohistochemistry (mIHC) images must use an aligner that registers each moving image to a reference Hematoxylin image. +> For this, Galaxy-ME includes the alternative registration tool {% tool **PALOM** %}. > {: .warning} @@ -169,7 +169,7 @@ UNetCoreograph will output images (used for downstream steps), masks, and a prev # Nuclear segmentation with **Mesmer** -Cell segmentation is the basis for all downstream single-cell analyses. Different segmentation tools work highly variably depending on the imaging modality or platform used. Because of this, Galaxy-ME has incorporated several cell segmentation tools so users may find the tool that works optimally for their data. +Cell segmentation is the basis for all downstream single-cell analyses. Different segmentation tools work highly variably depending on the imaging modality or platform used. Because of this, Galaxy-ME has incorporated several cell segmentation tools so users may find the tool that works optimally for their data. Available segmentation tools in Galaxy-ME: @@ -178,11 +178,11 @@ Available segmentation tools in Galaxy-ME: - Cellpose ({% cite Stringer2020 %}) - ilastik ({% cite Berg2019 %}) -In this tutorial, we use **Mesmer** because it tends to perform generally well on a diverse range of image types, and has a limited number of parameters to understand. +In this tutorial, we use **Mesmer** because it tends to perform generally well on a diverse range of image types, and has a limited number of parameters to understand. > Important detail: Running images in batches > -> Now that each image has been split into individual core images, downstream tools must be run on the images separately. Luckily, Galaxy makes this easy by including the option to run each tool in batch across a collection of inputs. Next to the input for the tool, select {% icon param-collection %} (**Dataset collection**) as the input type, and pass the collection output by UNetCoreograph as input. +> Now that each image has been split into individual core images, downstream tools must be run on the images separately. Luckily, Galaxy makes this easy by including the option to run each tool in batch across a collection of inputs. Next to the input for the tool, select {% icon param-collection %} (**Dataset collection**) as the input type, and pass the collection output by UNetCoreograph as input. > {: .comment} @@ -193,7 +193,7 @@ In this tutorial, we use **Mesmer** because it tends to perform generally well o > - *"Resolution of the image in microns-per-pixel"*: `0.65` > - *"Compartment for segmentation prediction:"*: `Nuclear` > -> > np.squeeze +> > np.squeeze > > > > The **np.squeeze** parameter is very important to select as `Yes` to make the output compatible with next steps > {: .comment} @@ -201,19 +201,19 @@ In this tutorial, we use **Mesmer** because it tends to perform generally well o {: .hands_on} > Imaging platform differences: Image resolution** -> -> A crucial parameter for Mesmer and other segmentation tools is the **Image resolution**. This is reported in microns/pixel, and can vary depending on the imaging platform used and the settings at image acquisition. Mesmer accepts the resolution in microns/pixel; however, if using UnMICST, the resolution must be reported as a ratio of the resolution of UnMICST's training images (0.65). For example, when using UnMICST, if your images were captured at a resolution of 0.65, then the UnMICST value would be 1, but if your images were captured at 0.325 microns/pixel, then the value you would enter for UnMICST would be 0.5. +> +> A crucial parameter for Mesmer and other segmentation tools is the **Image resolution**. This is reported in microns/pixel, and can vary depending on the imaging platform used and the settings at image acquisition. Mesmer accepts the resolution in microns/pixel; however, if using UnMICST, the resolution must be reported as a ratio of the resolution of UnMICST's training images (0.65). For example, when using UnMICST, if your images were captured at a resolution of 0.65, then the UnMICST value would be 1, but if your images were captured at 0.325 microns/pixel, then the value you would enter for UnMICST would be 0.5. > {: .warning} # Calculate single-cell features with **Quantification** -After generating a segmentation mask, the mask and the original registered image can be used to extract mean intensities for each marker in the panel, spatial coordinates, and morphological features for every cell. This step is performed by MCMICRO's **Quantification** module. +After generating a segmentation mask, the mask and the original registered image can be used to extract mean intensities for each marker in the panel, spatial coordinates, and morphological features for every cell. This step is performed by MCMICRO's **Quantification** module. -Once again, as this is a TMA, we will be running this in batch mode for every core image and its segmentation mask. +Once again, as this is a TMA, we will be running this in batch mode for every core image and its segmentation mask. -The quantification step will produce a CSV cell feature table for every image in the batch. +The quantification step will produce a CSV cell feature table for every image in the batch. > Quantification > @@ -224,7 +224,7 @@ The quantification step will produce a CSV cell feature table for every image in > - {% icon param-collection %} *"Additional Cell Masks "*: `Nothing Selected` (Other tools may produce multiple mask types) > - {% icon param-file %} *"Marker channels"*: Comma-separated markers file with marker_names in third column > -> > Mask metrics and Intensity metrics +> > Mask metrics and Intensity metrics > > > > Leaving the *"mask metrics"* and *"intensity metrics"* blank will by default run all available metrics > > @@ -235,7 +235,7 @@ The quantification step will produce a CSV cell feature table for every image in # **Convert McMicro Output to Anndata** -Anndata ({% cite Virshup2021 %}) is a Python package and file format schema for working with annotated data matrices that has gained popularity in the single-cell analysis community. Many downstream analysis tools, including Scimap from MCMICRO, Scanpy ({% cite Wolf2018 %}), and Squidpy ({% cite Palla2022 %}) are built around anndata format files (h5ad). This tool splits the marker intensity data into a separate dataframe (`X`), and places all observational data (spatial coordinates, morphological features, etc.) in the cell feature table into a separate dataframe (`obs`) that shares the same indices as `X`. In downstream analyses, new categorical variables, such as phenotype assignments for each cell, are stored in the `obs` dataframe. +Anndata ({% cite Virshup2021 %}) is a Python package and file format schema for working with annotated data matrices that has gained popularity in the single-cell analysis community. Many downstream analysis tools, including Scimap from MCMICRO, Scanpy ({% cite Wolf2018 %}), and Squidpy ({% cite Palla2022 %}) are built around anndata format files (h5ad). This tool splits the marker intensity data into a separate dataframe (`X`), and places all observational data (spatial coordinates, morphological features, etc.) in the cell feature table into a separate dataframe (`obs`) that shares the same indices as `X`. In downstream analyses, new categorical variables, such as phenotype assignments for each cell, are stored in the `obs` dataframe. Learn more about this file format at the [anndata documentation](https://anndata.readthedocs.io/en/latest/index.html). @@ -248,9 +248,9 @@ Learn more about this file format at the [anndata documentation](https://anndata > - *"Whether to remove the DNA channels from the final output"*: `No` > - *"Whether to use unique name for cells/rows"*: `No` > -> > Important parameter: Unique names for cells/rows +> > Important parameter: Unique names for cells/rows > > -> > Setting *"Whether to use unique name for cells/rows"* to `No` to ensures that downstream interactive visualizations will be able to map observational features to the mask CellIDs. +> > Setting *"Whether to use unique name for cells/rows"* to `No` to ensures that downstream interactive visualizations will be able to map observational features to the mask CellIDs. > {: .warning} > {: .hands_on} @@ -275,10 +275,10 @@ There are several ways to classify cells available in Galaxy-ME. Unsupervised ap > > > Limitations of GMM automated phenotyping > > -> > When manual gates are not provided, Scimap fits a GMM to determine a threshold between positive and negative cells. This automated gating works well when markers are highly abundant within the tissue, and the data shows a bimodal distribution (Figure 6A.). GMM gating can lead to spurious thresholds, however, when the data does not appear to be bimodal (Figure 6B.). This tends to happen when the marker is not highly abundant in the tissue, so there isn't a large positive population. Markers that have a highly continuous range of intensity, like certain functional markers, can also be problematic with GMM gating. It is recommended to always look at the GMM plots output by Scimap, and validate any potentially spurious gates manually. +> > When manual gates are not provided, Scimap fits a GMM to determine a threshold between positive and negative cells. This automated gating works well when markers are highly abundant within the tissue, and the data shows a bimodal distribution (Figure 6A.). GMM gating can lead to spurious thresholds, however, when the data does not appear to be bimodal (Figure 6B.). This tends to happen when the marker is not highly abundant in the tissue, so there isn't a large positive population. Markers that have a highly continuous range of intensity, like certain functional markers, can also be problematic with GMM gating. It is recommended to always look at the GMM plots output by Scimap, and validate any potentially spurious gates manually. > > > > ![Two bar plots with overlain curves. Left in A shows a bimodal distribution of CD3D, right in B shows a unimodal distribution in CD11B.](../../images/multiplex-tissue-imaging-TMA/ex2_example_GMMs.png "Scimap automatic gating GMMs for two markers. (A) An example of a marker with a bimodal distribution and a reasonable looking gate. (B) An example of a marker with a unimodal distribution that is not ideal for fitting with a GMM, and would be a candidate for manual validation and gating.") -> > +> > > {: .comment} > {: .hands_on} @@ -286,15 +286,15 @@ There are several ways to classify cells available in Galaxy-ME. Unsupervised ap # Interactive visualization of multiplex tissue images -Visual analysis is an important part of multiplex tissue imaging workflows. Galaxy-ME has several tools that make interactive visualization easy, and can be used at various stages of analysis. +Visual analysis is an important part of multiplex tissue imaging workflows. Galaxy-ME has several tools that make interactive visualization easy, and can be used at various stages of analysis. ## Converting UNetCoreograph images to OME-TIFF using the **Convert image** tool -UNetCoreograph outputs each individual core image in `tiff` format. Interactive visualization tools, such as **Vitessce** and **Avivator** require the images to be in `OME-TIFF` format to be viewed. Galaxy-ME includes a conversion tool that can accomodate this, along with many other useful conversion functions. +UNetCoreograph outputs each individual core image in `tiff` format. Interactive visualization tools, such as **Vitessce** and **Avivator** require the images to be in `OME-TIFF` format to be viewed. Galaxy-ME includes a conversion tool that can accomodate this, along with many other useful conversion functions. > Convert image > -> 1. {% tool [Convert image](toolshed.g2.bx.psu.edu/repos/imgteam/bfconvert/ip_convertimage/6.7.0+galaxy0) %} with the following parameters: +> 1. {% tool [Convert image](toolshed.g2.bx.psu.edu/repos/imgteam/bfconvert/ip_convertimage/6.7.0+galaxy0) %} with the following parameters: > - {% icon param-collection %} *"Input Image"*: `UNetCoreograph Images` > - *"Output data type"*: `OME TIFF` > - *"Tile image"*: `Tile image` @@ -305,7 +305,7 @@ UNetCoreograph outputs each individual core image in `tiff` format. Interactive ## **Rename OME-TIFF Channels** -Some tools can cause the channel names in an OME-TIFF image to be lost. To fix this, or to change the channel names to whatever the user prefers, the **Rename OME-TIFF Channels** tool can be invoked using a markers file similar to the one used in previous steps. +Some tools can cause the channel names in an OME-TIFF image to be lost. To fix this, or to change the channel names to whatever the user prefers, the **Rename OME-TIFF Channels** tool can be invoked using a markers file similar to the one used in previous steps. > Rename channels > @@ -332,7 +332,7 @@ For any `OME-TIFF` image in a Galaxy-ME history, there will be an option to view ## Generating an interactive visualization dashboard with **Vitessce** -**Vitessce** is a powerful visualization tool that creates interactive dashboards (Figure 8.) to look at a multiplex `OME-TIFF` images in conjunction with data generated during analysis and stored in an anndata file. The segmentation mask can be overlaid onto the image to qualitatively assess the segmentation performance. The mask can then be colored with associated observational data (Figure 9A.), such as `phenotype`, with the same colors appearing in barplots (Figure 9B.), UMAP representations, heatmaps, and marker intensity violin plots for comrehensive data exploration. +**Vitessce** is a powerful visualization tool that creates interactive dashboards (Figure 8.) to look at a multiplex `OME-TIFF` images in conjunction with data generated during analysis and stored in an anndata file. The segmentation mask can be overlaid onto the image to qualitatively assess the segmentation performance. The mask can then be colored with associated observational data (Figure 9A.), such as `phenotype`, with the same colors appearing in barplots (Figure 9B.), UMAP representations, heatmaps, and marker intensity violin plots for comrehensive data exploration. ![Screenshot of the vitessce dashboard.](../../images/multiplex-tissue-imaging-TMA/ex2_fullVitessce.png "A Full view of a vitesse dashboard for one core from Exemplar-002.") @@ -354,7 +354,7 @@ For any `OME-TIFF` image in a Galaxy-ME history, there will be an option to view # Next steps: Compositional and spatial analyses -Galaxy-ME includes additional tools from **Scimap** and tools from the **Squidpy** package ({% cite Palla2022 %}) that can be used to perform a variety of downstream analyses. For example, once phenotypes have been assigned to individual cells, **Squidpy** has several methods for understanding the spatial organization of the tissue. Using **Squidpy**, a spatial neighborhood graph is first generated, from which the organization of specific phenotype groups and their interactions can be quantified. +Galaxy-ME includes additional tools from **Scimap** and tools from the **Squidpy** package ({% cite Palla2022 %}) that can be used to perform a variety of downstream analyses. For example, once phenotypes have been assigned to individual cells, **Squidpy** has several methods for understanding the spatial organization of the tissue. Using **Squidpy**, a spatial neighborhood graph is first generated, from which the organization of specific phenotype groups and their interactions can be quantified. > Spatial analysis with Squidpy > @@ -371,7 +371,7 @@ Galaxy-ME includes additional tools from **Scimap** and tools from the **Squidpy > > > Neighborhood enrichment plot > > -> > **Squidpy** was used to calculate neighborhood enrichments for each phenotype in core 2 of exemplar 2 (Figure 10.). This shows which phenotypes co-locate most frequently within the tissue. +> > **Squidpy** was used to calculate neighborhood enrichments for each phenotype in core 2 of exemplar 2 (Figure 10.). This shows which phenotypes co-locate most frequently within the tissue. > > > > ![Heatmap showing phenotype vs neighbourhood enrichment. Most of the heatmap is blue/green (low) but one cell under epithelial is bright yellow (high)](../../images/multiplex-tissue-imaging-TMA/ex2_squidpy_enrichment.png "The output of Squidpy's neighborhood enrichment on core 2 from Exemplar-002.") > > @@ -387,9 +387,9 @@ Galaxy-ME includes additional tools from **Scimap** and tools from the **Squidpy > - In *"Plotting Options"*: > - *"Ripley's statistic to be plotted"*: `L` > -> > Ripley's L plot +> > Ripley's L plot > > -> > **Squidpy** was used to calculate Ripley's L curves for each phenotype in core 2 of exemplar 2 (Figure 11.). This shows the overall organization of each phenotype in the tissue. If the curve for a given phenotype lies above the light grey null line (Example: Epithelial cells in Figure 11.), the phenotype is statistically significantly clustered. If the curve lies on the null line (Example: Myeloid lineage in Figure 11.), it's spatial distribution within the tissue is random. If the curve is underneath the null line (Example: T cells in Figure 11.), it's spatial distribution is statistically significantly dispersed. +> > **Squidpy** was used to calculate Ripley's L curves for each phenotype in core 2 of exemplar 2 (Figure 11.). This shows the overall organization of each phenotype in the tissue. If the curve for a given phenotype lies above the light grey null line (Example: Epithelial cells in Figure 11.), the phenotype is statistically significantly clustered. If the curve lies on the null line (Example: Myeloid lineage in Figure 11.), it's spatial distribution within the tissue is random. If the curve is underneath the null line (Example: T cells in Figure 11.), it's spatial distribution is statistically significantly dispersed. > > > > ![Graph of Ripley's L. Value is plotted against bins, all of which show cursves starting at 0 and increasing as bins increase. Epithelial is the highest curve.](../../images/multiplex-tissue-imaging-TMA/ex2_squidpy_ripleys.png "The output of Squidpy's Ripley's L curve on core 2 from Exemplar-002.") > > diff --git a/topics/imaging/tutorials/object-tracking-using-cell-profiler/tutorial.md b/topics/imaging/tutorials/object-tracking-using-cell-profiler/tutorial.md index 842b5a4d12d0b2..fdc646ce47bfd7 100644 --- a/topics/imaging/tutorials/object-tracking-using-cell-profiler/tutorial.md +++ b/topics/imaging/tutorials/object-tracking-using-cell-profiler/tutorial.md @@ -17,11 +17,12 @@ objectives: time_estimation: 1H key_points: - CellProfiler in Galaxy can be used to track objects in time-lapse microscopy images -contributors: -- sunyi000 -- beatrizserrano -- jkh1 +contributions: + authorship: + - sunyi000 + - beatrizserrano + - jkh1 --- diff --git a/topics/imaging/tutorials/tutorial-CP/tutorial.md b/topics/imaging/tutorials/tutorial-CP/tutorial.md index ea48402b0af5f6..da796d919591b6 100644 --- a/topics/imaging/tutorials/tutorial-CP/tutorial.md +++ b/topics/imaging/tutorials/tutorial-CP/tutorial.md @@ -24,9 +24,12 @@ key_points: time points, z-stack positions and crop the image in different ways. - CellProfiler in Galaxy can segment and extract features of any object of interest. - The features and masks can be exported for further analysis. -contributors: -- beatrizserrano -- jkh1 + +contributions: + authorship: + - beatrizserrano + - jkh1 + zenodo_link: '' --- diff --git a/topics/introduction/tutorials/galaxy-intro-101-everyone/tutorial.md b/topics/introduction/tutorials/galaxy-intro-101-everyone/tutorial.md index 8ae72e5c797f64..dc8e8c7d90177d 100644 --- a/topics/introduction/tutorials/galaxy-intro-101-everyone/tutorial.md +++ b/topics/introduction/tutorials/galaxy-intro-101-everyone/tutorial.md @@ -23,14 +23,18 @@ key_points: - "Galaxy provides ways to share your results and methods with others" subtopic: core priority: 2 -contributors: - - annefou - - nagoue - - chrisbarnettster - - michelemaroni89 - - olanag1 - - tnabtaf - - shiltemann + +contributions: + authorship: + - annefou + - nagoue + - chrisbarnettster + - michelemaroni89 + - olanag1 + - tnabtaf + - shiltemann + funding: + - elixir-europe recordings: - captioners: diff --git a/topics/introduction/tutorials/galaxy-intro-ngs-data-managment/tutorial.md b/topics/introduction/tutorials/galaxy-intro-ngs-data-managment/tutorial.md index dead7b0fb87f1a..40cbaef93e6a4e 100644 --- a/topics/introduction/tutorials/galaxy-intro-ngs-data-managment/tutorial.md +++ b/topics/introduction/tutorials/galaxy-intro-ngs-data-managment/tutorial.md @@ -19,11 +19,13 @@ key_points: - "One can retrieve NGS data from Sequence Read Archive" - "Galaxy can analyze massive amounts of data and make them suitable for secondary analysis" subtopic: next-steps -contributors: - - nekrut - - mvdbeek - - tnabtaf - - blankenberg + +contributions: + authorship: + - nekrut + - mvdbeek + - tnabtaf + - blankenberg recordings: - captioners: diff --git a/topics/introduction/tutorials/galaxy-intro-peaks2genes/tutorial.md b/topics/introduction/tutorials/galaxy-intro-peaks2genes/tutorial.md index 1203b608e6b86a..e538af9eac945f 100644 --- a/topics/introduction/tutorials/galaxy-intro-peaks2genes/tutorial.md +++ b/topics/introduction/tutorials/galaxy-intro-peaks2genes/tutorial.md @@ -34,6 +34,10 @@ contributions: - hexylena editing: - teresa-m + funding: + - elixir-europe + - deNBI + - uni-freiburg --- diff --git a/topics/introduction/tutorials/galaxy-reproduce/tutorial.md b/topics/introduction/tutorials/galaxy-reproduce/tutorial.md index db807ac4688e2c..a9cd0ebcfb42be 100644 --- a/topics/introduction/tutorials/galaxy-reproduce/tutorial.md +++ b/topics/introduction/tutorials/galaxy-reproduce/tutorial.md @@ -17,9 +17,12 @@ key_points: - "Galaxy provides ways to inspect and re-use Galaxy histories" - "Galaxy provides an easy way to re-run tasks from histories" subtopic: next-steps -contributors: - - foellmelanie - - annefou +contributions: + authorship: + - foellmelanie + - annefou + funding: + - uni-freiburg --- This training will demonstrate how to reproduce analyses performed in the Galaxy framework. Before we start with the hands-on part, we would like to give you some information about Galaxy. diff --git a/topics/metabolomics/tutorials/lcms-dataprocessing/tutorial.md b/topics/metabolomics/tutorials/lcms-dataprocessing/tutorial.md index 9eea73324e5de5..b15ee612543c2d 100644 --- a/topics/metabolomics/tutorials/lcms-dataprocessing/tutorial.md +++ b/topics/metabolomics/tutorials/lcms-dataprocessing/tutorial.md @@ -16,10 +16,14 @@ key_points: - Data processing is a key step in untargeted Metabolomics analyses. The question of data filtering and correction must be addressed in all projects, even thought in some cases it may lead to the decision of no action on data. In particular, blank filtering, pool variation study and signal drift correction are common aspects to consider when dealing with LC-MS. - Although some main steps are standard, various ways to combine tools exist. Remember that depending on your context (type of samples, protocol specificities...) specific filters/normalisations may be needed, independently of standards ones. - A variety of tools is available in Galaxy, but do not forget that you need appropriate knowledge to decide what to use depending on your data. -contributors: -- melpetera -- workflow4metabolomics +contributions: + authorship: + - melpetera + - workflow4metabolomics + editing: + funding: + - elixir-europe --- @@ -28,9 +32,9 @@ Metabolomics is a *-omic* science known for being one of the most closely relate It involves the study of different types of matrices, such as blood, urine, tissues, in various organisms including plants. It focuses on studying the very small molecules which are called *metabolites*, to better understand matters linked to the metabolism. -Metabolomics analyses can be quite complex to conduct, especially when dealing with untargeted approaches. -**Liquid-Chromatography Mass Spectrometry** (LC-MS) is one of the three main technologies used to perform this kind of approach. -Data analysis for this technology requires a large variety of steps, ranging from extracting information from the raw data to statistical analysis and annotation. +Metabolomics analyses can be quite complex to conduct, especially when dealing with untargeted approaches. +**Liquid-Chromatography Mass Spectrometry** (LC-MS) is one of the three main technologies used to perform this kind of approach. +Data analysis for this technology requires a large variety of steps, ranging from extracting information from the raw data to statistical analysis and annotation. One of these steps is called "data processing". It takes place after the pre-processing step (extraction of the peak list from raw data) and before any statistical analysis. You can get an overview of a complete LC-MS untargeted metabolomic workflow by following [the dedicated training material]({% link topics/metabolomics/tutorials/lcms/tutorial.md %}). @@ -49,7 +53,7 @@ What should you do to ensure the quality of your tables? This tutorial will show # Overview and data upload -Data processing covers a large range of actions. They can generally be described as transformation (*e.g.* normalisation) or filter (*e.g.* removal of unwanted ions). +Data processing covers a large range of actions. They can generally be described as transformation (*e.g.* normalisation) or filter (*e.g.* removal of unwanted ions). In this tutorial we will focus on three main types of processing: - Removing "trash" signals - Correcting intensities @@ -62,7 +66,7 @@ The different Galaxy tools that will be used in this tutorial are given in the f ![The picture is composed of 3 main boxes linked by arrows from left to right. In each of the first and last boxes, labelled '3 tabulars', are found 3 files named sampleMetadata, variableMetadata and dataMatrix. In the middle box labelled 'Quality Control' is found the workflow used in this tutorial, represented as tiny boxes with tool names, linked with arrows following the tutorial order.](../../images/tutorial-lcms-proc-wf.png "The full tutorial workflow") All these modules are part of the [Wokflow4Metabolomics](http://workflow4metabolomics.org/) tool suit ({% cite Giacomoni2014 %}, {% cite Guitton2017 %}). -They are compatible with the whole data analysis solution maintained by the W4M team. +They are compatible with the whole data analysis solution maintained by the W4M team. > Workflow4Metabolomics public history > @@ -76,42 +80,42 @@ They are compatible with the whole data analysis solution maintained by the W4M ## Dataset description To illustrate the steps in this tutorial, a dataset has been built purposely. -It is composed of 3 files. They are text files of tables with tabulation as separator. +It is composed of 3 files. They are text files of tables with tabulation as separator. The *dataMatrix* file is a table containing the intensities of measured variables (ions, in lines) for every samples (in column). The first column is for ions' identifiers while the first line is for samples' identifiers. -The *variableMetadata* file is a table containing information about the ions. -The first column is for ions' identifiers while the other columns gather information about m/z and retention time (rt). +The *variableMetadata* file is a table containing information about the ions. +The first column is for ions' identifiers while the other columns gather information about m/z and retention time (rt). -The *sampleMetadata* file is a table containing information about the samples. +The *sampleMetadata* file is a table containing information about the samples. The first column is for samples' identifiers while the other columns gather analytical and biological information -such as the order of injection in the analytical sequence and the biological groups of interest for the supposed study. +such as the order of injection in the analytical sequence and the biological groups of interest for the supposed study. The simulated design is composed of 30 biological samples (tagged "sample" in the *sampleType* column), completed with 8 quality-control pooled samples (tagged "pool" in the *sampleType* column) -and 6 extraction solvent samples (tagged "blank" in the *sampleType* column). +and 6 extraction solvent samples (tagged "blank" in the *sampleType* column). The samples have been supposedly injected in two distinct sequences (tagged "B1" and "B2" in the *batch* column), -the injection order being given in the *injectionOrder* column. +the injection order being given in the *injectionOrder* column. Two sample characteristics are given: - The *Group* column represents two groups "A" and "B", supposedly two biological groups (*e.g.* phenotypes, treatment groups...). -- The *Osmo* column represents a measurement of supposed osmolarity, imagining that the samples may be urine samples. +- The *Osmo* column represents a measurement of supposed osmolarity, imagining that the samples may be urine samples. ## Data upload -To perform the different exercices of this tutorial, you need to create a new history and upload the dedicated dataset. +To perform the different exercices of this tutorial, you need to create a new history and upload the dedicated dataset. > Data upload > > 1. Create a new history for this tutorial -> 2. Import the 3 starting files in your history. Two possibilities: +> 2. Import the 3 starting files in your history. Two possibilities: > - Option 1: from a shared data library (ask your instructor) > - Option 2: from [Zenodo](https://zenodo.org/record/5179809) using the URLs given below: > > ``` > https://zenodo.org/record/5179809/files/Dataprocessing_dataMatrix.txt > https://zenodo.org/record/5179809/files/Dataprocessing_sampleMetadata.txt -> https://zenodo.org/record/5179809/files/Dataprocessing_variableMetadata.txt +> https://zenodo.org/record/5179809/files/Dataprocessing_variableMetadata.txt > ``` > > {% snippet faqs/galaxy/datasets_import_via_link.md %} @@ -120,7 +124,7 @@ To perform the different exercices of this tutorial, you need to create a new hi > > {% snippet faqs/galaxy/datasets_rename.md %} > -> 4. Check that the datatype is "tabular". If not, you may change it. +> 4. Check that the datatype is "tabular". If not, you may change it. > > {% snippet faqs/galaxy/datasets_change_datatype.md datatype="datatypes" %} > @@ -134,17 +138,17 @@ To perform the different exercices of this tutorial, you need to create a new hi # Removing "trash" signals -Data are often affected by various sources of unwanted variability. This can be found in your tables in different ways, -two common ones being the presence of unwanted ions, and the effect of biological or analytical variables on intensity measures. -This unwanted information can limit the effectiveness of statistical methods, leading sometimes to difficulties in revealing investigated effects. -Thus, identifying such variability can help analysing your data at its full potential. -Yet, getting rid of such information may not be a trivial problem. It requires different elements to be completed successfully. +Data are often affected by various sources of unwanted variability. This can be found in your tables in different ways, +two common ones being the presence of unwanted ions, and the effect of biological or analytical variables on intensity measures. +This unwanted information can limit the effectiveness of statistical methods, leading sometimes to difficulties in revealing investigated effects. +Thus, identifying such variability can help analysing your data at its full potential. +Yet, getting rid of such information may not be a trivial problem. It requires different elements to be completed successfully. -In this section we will adress the question of "trash" signal filtering. +In this section we will adress the question of "trash" signal filtering. By this we mean ions that are present in the extracted peak list we have, but that do not correspond to relevant compounds to analyse. -This can be for example noise, or ions from compounds that are not present in the original biological samples. +This can be for example noise, or ions from compounds that are not present in the original biological samples. -To make it clearer, we will illustrate this with two examples: a "simple" one first and a more advanced one in a second time. +To make it clearer, we will illustrate this with two examples: a "simple" one first and a more advanced one in a second time. ## Filtering signals at given retention times @@ -152,9 +156,9 @@ When using a chromatography column for MS analysis, you may want to exclude some For example, you may want to exclude the dead volume, a calibration zone at the begining or the end, or to exclude a column flush. In this tutorial, let's suppose the data are from some LC-QTOF analysis with a dead volume between 0 and 0.4 minutes and a column flush from 16 minutes. -Then we may want to exclude ions that may be found at theses specific retention time (rt) ranges. +Then we may want to exclude ions that may be found at theses specific retention time (rt) ranges. A quick check at the variableMetadata file reveals that a retention time column is available ("rt") with values in minutes. -We can then use this column to filter the dataset. +We can then use this column to filter the dataset. > Using Generic_filter to filter ions found at specific retention times > @@ -181,9 +185,9 @@ We can then use this column to filter the dataset. > {: .hands_on} -The **Generic Filter** {% icon tool %} tool generates 3 tables. They correspond to the 3 original tables, -except the content has been filtered according to the specified parameters. -By "filtering", it means removing from the dataset some variables (ions) and/or samples according to the defined filters. +The **Generic Filter** {% icon tool %} tool generates 3 tables. They correspond to the 3 original tables, +except the content has been filtered according to the specified parameters. +By "filtering", it means removing from the dataset some variables (ions) and/or samples according to the defined filters. > > @@ -201,7 +205,7 @@ By "filtering", it means removing from the dataset some variables (ions) and/or {: .question} This was a relatively easy task to do. The key point to perform the filter was to know the ranges of rt that needed to be filter, -since the information about rt values was already explicitly found in the dataset. +since the information about rt values was already explicitly found in the dataset. However, sometimes filtering requires more steps to be able to perform the wanted processing. This will be illustrated with the next example. @@ -210,25 +214,25 @@ This will be illustrated with the next example. As mentioned before, measured signals using mass spectrometry may not always be all of interest. Some can be noise, or compounds not characteristic of the analysed biological samples. There are several ways to reduce there impact on gathered data. -But one key point is always, as a starting point, to identify the issue. +But one key point is always, as a starting point, to identify the issue. In the previous example we knew there were retention time ranges of values where signals were not relevant. -But they may also be some signals that represent noise, found at no specific retention times. -So a question can be "how do we identify these signals?". +But they may also be some signals that represent noise, found at no specific retention times. +So a question can be "how do we identify these signals?". -One possible procedure is the use of blanks to estimate the noise, as a reference. +One possible procedure is the use of blanks to estimate the noise, as a reference. The idea is to compare blanks’ intensities with other samples’ intensities. If there is no subtantial difference, we can assume that the concerned signal is noise. Of course, to be able to do so, you need to inject reference blanks along with your biological samples to get these intensities in your dataMatrix. -Thus, you need to anticipate it when you define your injection sequence. -Ideally, the blanks to use are extraction blanks, but you can also use injection solvent depending on your protocols. +Thus, you need to anticipate it when you define your injection sequence. +Ideally, the blanks to use are extraction blanks, but you can also use injection solvent depending on your protocols. When blanks are available in your dataset, another thing to consider is "how do I formalise the information needed to be able to filter?". One common way to compare may be to set a minimum difference between means or medians, or to test for significant difference with a statistical test. -In this tutorial, we will choose to calculate a mean fold change between blanks and non-blank samples, and to set a threshold value for filtering. +In this tutorial, we will choose to calculate a mean fold change between blanks and non-blank samples, and to set a threshold value for filtering. -The mean fold change ("fold") for each ion can be calculated using the **Intensity Check** {% icon tool %} tool. +The mean fold change ("fold") for each ion can be calculated using the **Intensity Check** {% icon tool %} tool. > Using Intensity Check to generate the information needed to filter > @@ -252,8 +256,8 @@ The mean fold change ("fold") for each ion can be calculated using the **Intensi {: .hands_on} This module generates two outputs: a pdf file for plots (that is not of interest in our example) -and a table corresponding to the variableMetadata file used as input, completed with new columns depending on the selected parameters. -In our case, it generated a column named *fold_Other_VS_blank* that we will use for filtering. +and a table corresponding to the variableMetadata file used as input, completed with new columns depending on the selected parameters. +In our case, it generated a column named *fold_Other_VS_blank* that we will use for filtering. > > @@ -264,21 +268,21 @@ In our case, it generated a column named *fold_Other_VS_blank* that we will use > > > > > > Since *"Selected class"*=`blank`, the samples are devided in two groups: the blank samples on one hand -> > and all the other samples on the other hand. -> > To calculate a mean fold change (*i.e.* a ratio of means) between the two classes, we need to define which mean will be used as numerator -> > and which one will be used as denominator. Since we defined that the selected class should be the denominator, the values we will get are -> > (mean of non-blank samples)/(mean of blank samples). -> > Thus, a value of "4" for a given ion means that the ion has a mean 4-times higher in non-blank samples compared to blank samples. +> > and all the other samples on the other hand. +> > To calculate a mean fold change (*i.e.* a ratio of means) between the two classes, we need to define which mean will be used as numerator +> > and which one will be used as denominator. Since we defined that the selected class should be the denominator, the values we will get are +> > (mean of non-blank samples)/(mean of blank samples). +> > Thus, a value of "4" for a given ion means that the ion has a mean 4-times higher in non-blank samples compared to blank samples. > > > {: .solution} > {: .question} The **Intensity Check** {% icon tool %} tool generates additional information about your dataset, but do not perform any filter. -We can now define a threshold for filtering, and use the **Generic Filter** {% icon tool %} tool again to remove noise signal. +We can now define a threshold for filtering, and use the **Generic Filter** {% icon tool %} tool again to remove noise signal. Here we will use a threshold value of "4". What we want is to remove ions having a fold value lower than 4, meaning that the mean values of biological samples' intensities for theses ions are not sufficiently high compared to blanks to be considered -to be resulting from relevant compounds. +to be resulting from relevant compounds. > Using Generic_filter to filter ions with insuffisant mean contrast with blank samples > @@ -303,7 +307,7 @@ to be resulting from relevant compounds. > {: .hands_on} -As for the previous use of **Generic Filter**, we now have a dataset filtered from noise signals. +As for the previous use of **Generic Filter**, we now have a dataset filtered from noise signals. > > @@ -312,22 +316,22 @@ As for the previous use of **Generic Filter**, we now have a dataset filtered fr > > > > > > From a dataset containing originally 200 ions, the two successive filters lead to a dataset containing only 131 ions. -> > The number of samples did not change, so we still have 30 biological samples, 8 QC pools and 6 blanks. +> > The number of samples did not change, so we still have 30 biological samples, 8 QC pools and 6 blanks. > > > {: .solution} > {: .question} -At that point we will not use the blank samples anymore. However, before removing them from the dataset, it is always interesting to -get an overview of the dataset including blanks. +At that point we will not use the blank samples anymore. However, before removing them from the dataset, it is always interesting to +get an overview of the dataset including blanks. ## Filtered dataset overview The idea here is to have a glance at what the dataset looks like at a macro scale. For this tutorial, we consider that at this step what we have in our dataset are ions only resulting from compounds originally present in the biological samples. -Thus, there are some assumptions we can begin to make, that we can try to check using graphical tools. +Thus, there are some assumptions we can begin to make, that we can try to check using graphical tools. -Here, we will use a tool that is called **Quality Metrics** to have an overview of our dataset through the generation of a pdf file containing some plots. +Here, we will use a tool that is called **Quality Metrics** to have an overview of our dataset through the generation of a pdf file containing some plots. > Using Quality Metrics to get an overview of the dataset > @@ -352,17 +356,17 @@ This tool generates several outputs (some of them are going to be used in a late On the top left of the picture, we can see the two first components of a Principal Component Analysis (PCA) with the projection of samples on it, colored by sample type. We can see that the main variability in the dataset distinguishes the blank samples (on the left in black) from the other ones (red for pools and green for samples). -This is awaited since blanks are supposed to have very low intensities compared to biological samples. +This is awaited since blanks are supposed to have very low intensities compared to biological samples. This observation is consistant with the top middle plot which represents the sum of intensities for each sample (plotting according to the injection order), -where blank samples have very low values. +where blank samples have very low values. -This kind of plots is a good way to detect samples that may have abnormally low profiles: +This kind of plots is a good way to detect samples that may have abnormally low profiles: if a sample is positioned at the same area as the blanks, it is suspicious and a special attention should be given to the concerned sample. In this tutorial dataset, no atypical sample is observed, so no special attention needs to be paid on specific samples. -Since the blank samples are of no use anymore, we can remove them from the dataset. -Again, this can be done running the **Generic Filter** {% icon tool %} tool, using the *sampleType* column of the sampleMetadata table. +Since the blank samples are of no use anymore, we can remove them from the dataset. +Again, this can be done running the **Generic Filter** {% icon tool %} tool, using the *sampleType* column of the sampleMetadata table. > Using Generic_filter to remove blank samples from the dataset > @@ -390,7 +394,7 @@ Again, this can be done running the **Generic Filter** {% icon tool %} tool, usi > {: .hands_on} -This step leads to a dataset containing 131 ions and 38 samples (from which 8 pools). +This step leads to a dataset containing 131 ions and 38 samples (from which 8 pools). You may have noticed that from module to module, output names tend to become longer and longer. To prevent very long and not-so-informative names due to successive use of modules, we highly recommand to regularly rename the outputs. @@ -409,31 +413,31 @@ Now we are ready to investigate another type of data processing: correcting inte # Signal drift and batch effect correction -Here we will illustrate an example of data processing that is not about filtering, but about correcting. +Here we will illustrate an example of data processing that is not about filtering, but about correcting. Indeed, when it comes to performing statistics comparing biological samples, it is crucial for the intensity values used to be reflecting -relevant variability between samples. +relevant variability between samples. In untargeted Metabolomics studies, we manipulate measures that are relative abundancies. Although we have no unit attached to the intensities, we at least assume that, for a given ion, an intensity value for one sample being higher than the one from another sample means that the compound from which the ion is generated is originally found in higher abundance in the biological sample from -the first sample compared to the other. +the first sample compared to the other. This assumption may seem trivial, but truth is it is not when dealing with LC-MS data. -It is known that when injecting successively a large number of samples, the LC-MS system tends to get dirty. This may cause a measure drift. +It is known that when injecting successively a large number of samples, the LC-MS system tends to get dirty. This may cause a measure drift. To prevent inability to catch signal anymore, in case of large injection series, the sequence is generally divided into several batches and the source is cleaned between batches. -Unfortunately, these signal drift and batch design can add significant variability in the data. -It makes sample comparison complicated, since the assumption stated previously about abundance in biological samples may not be true anymore. -In case data is impacted by these effects, we need a way to normalise the data to obtain something reliable for statistical analysis. +Unfortunately, these signal drift and batch design can add significant variability in the data. +It makes sample comparison complicated, since the assumption stated previously about abundance in biological samples may not be true anymore. +In case data is impacted by these effects, we need a way to normalise the data to obtain something reliable for statistical analysis. ## [Optional step] Checking batch effect on data -You may have noticed in the **Quality Metrics** output PDF that appart from blanks, samples seemed to be seperated in two groups. +You may have noticed in the **Quality Metrics** output PDF that appart from blanks, samples seemed to be seperated in two groups. Truth is it is indeed. However, the best would be to confirm it, in particular by finding out what these groups are linked to. -Here dices are already rolled, and given the fact that in this section we are adressing signal drift and batch effects, -one could suppose that the groups may have something to do with it. +Here dices are already rolled, and given the fact that in this section we are adressing signal drift and batch effects, +one could suppose that the groups may have something to do with it. -Let's confirm it by performing a PCA, with the specificity to colour the sample projections according to the supposed effect: the batch information. -This can be done using the **Multivariate** {% icon tool %} tool. +Let's confirm it by performing a PCA, with the specificity to colour the sample projections according to the supposed effect: the batch information. +This can be done using the **Multivariate** {% icon tool %} tool. > Using Multivariate to get a coloured score plot from a PCA > @@ -449,7 +453,7 @@ This can be done using the **Multivariate** {% icon tool %} tool. > > > > > -> > Changing the `0.8` value to `0.4` makes the text size of labels on the score plot smaller. Since the plot box size by default is tiny, this enhances the readability of the plot. +> > Changing the `0.8` value to `0.4` makes the text size of labels on the score plot smaller. Since the plot box size by default is tiny, this enhances the readability of the plot. > {: .comment} > > > Comment to W4M users @@ -459,15 +463,15 @@ This can be done using the **Multivariate** {% icon tool %} tool. > {: .hands_on} -The output of **Multivariate** we will be interested in here is only the PDF file (Multivariate_figure.pdf). +The output of **Multivariate** we will be interested in here is only the PDF file (Multivariate_figure.pdf). Among the four plots displayed in the file, we can see at the bottom left corner that the first component clearly reveals two distinct groups. -These groups perfectly match the batch information we coloured the sample projections with. +These groups perfectly match the batch information we coloured the sample projections with. -This confirms that we indeed have at least a batch effect in the data. -To note, this comment is relevant thanks to the fact that supposedly we randomised the injection sequence. -This is essential to be able to efficiently separate any analytical effect from known biological variables when searching for effects. +This confirms that we indeed have at least a batch effect in the data. +To note, this comment is relevant thanks to the fact that supposedly we randomised the injection sequence. +This is essential to be able to efficiently separate any analytical effect from known biological variables when searching for effects. If you look at the sampleMetadata file we originally imported, you can notice that the biological group (*A* and *B*) are equally found in the two batches, -with alternation all through the injection order. +with alternation all through the injection order. ## Performing the correction process @@ -477,9 +481,9 @@ This is even more the case when dealing with batch effects and signal drifts. There is a strategy, described initially by Van Der Kloet in 2009 ({% cite VdK2009 %}), that has made its way to nowadays procedures ({% cite Dunn2011 %}). The idea is to model the signal drift and the batch effect level by using a reference that can be representative for every ions in the dataset. Indeed, signal drift and batch effects can have very different impacts accross ions in a same dataset, so it is crucial to have a reference that is reliable -for each ion independantly. +for each ion independantly. -Here the "universal" reference is obtained by using samples that are made by pooling together an extract of every samples in study. +Here the "universal" reference is obtained by using samples that are made by pooling together an extract of every samples in study. Thus, these Quality-control pooled samples ('pools') contain all the compounds that are originally found in the samples, being a reference for a very wide range of ions. By injecting these pools all through the injection sequence of the study samples, we obtain a reference for which the main variability observed is composed of the analytical effects we want to correct, since biologically they are supposed to be identical. @@ -492,12 +496,12 @@ the analytical effects we want to correct, since biologically they are supposed > > ![An example plot with the normalisation formula](../../images/lcms_BC_theo.png "How this works") > -> You can see a plot representing 6 sample measures (blue points) in a batch for a given extracted ion. +> You can see a plot representing 6 sample measures (blue points) in a batch for a given extracted ion. > The yellow line represents a model for the signal drift that can be used to normalise the data. -> This line is determined using the pools only (red squares). -> With the given formula, we can correct the signal drift. +> This line is determined using the pools only (red squares). +> With the given formula, we can correct the signal drift. > -> This work has to be done for each batch. +> This work has to be done for each batch. > Thus, if your sequence is divided into several batches, the idea is to obtain something similar to the following picture: > > ![A before/after plot showing an example of intensities before correction, with clear signal drift and batch effects, and after correction, where the effects have been erased thanks to the correction process](../../images/lcms_BC_theo2.png "Before/after picture") @@ -509,7 +513,7 @@ In this tutorial, we have all the information we need to perform the correction: - They are numerous enough in each batch for the regression to be reliable (well, at least we have enough of them to perform a linear modeling). - The dataset contains the mandatory information needed in the sampleMetadata file: the injection order, the batches of analysis and the sample type (pool or sample). -We can then use the **Batch correction** {% icon tool %} tool to perform the correction. +We can then use the **Batch correction** {% icon tool %} tool to perform the correction. > Using Batch correction to correct the data from signal drift and batch effet > @@ -545,8 +549,8 @@ another for the all_loess_pool/all_loess_sample (all_loess) ones. The main differences between the two strategies are the following: - one part allows regression type choice and applies the normalisation depending on internal quality metrics -- the other part offers the possibility to apply the correction without pool samples. -For more information, do not hesitate to read the help section provided with the Galaxy tool. +- the other part offers the possibility to apply the correction without pool samples. +For more information, do not hesitate to read the help section provided with the Galaxy tool. In this tutorial example, we used the *linear* option which is part of the 3L strategy. We could not use any of the other options because the other ones would need more pools per batch than what we have here. @@ -567,7 +571,7 @@ plus 3 additional pages at the end that display overall before/after indicators > > > > Compared to the batch effect that was highlighted as the main effect in the data before the correction process, > > we can see that after correction, the batch effect is no longer explaining the first PCA component. -> > This result is awaited since the reduction of the batch effect is one of the aim of the Batch correction step. +> > This result is awaited since the reduction of the batch effect is one of the aim of the Batch correction step. > > > {: .solution} > @@ -576,41 +580,41 @@ plus 3 additional pages at the end that display overall before/after indicators Now that we corrected the data from the signal drift and batch effect, we consider that differences in intensity between samples can be attributed mostly to differences in concentration of original compounds. However, we know that what we converted to intensity values in our table is initially signal measurements, thus we suspect that -intensities may be noisy at some point. +intensities may be noisy at some point. # Filtering signals of insufficient quality Similarly to many measuring devices, values that are generated by LC-MS devices are subject to noise. In the specific case of untargeted Metabolomics, the values are relative and can have, in a single dataset, a very large range of scales. It is difficult to quantify the noise, even so it is crucial to have a minimum confidence when searching for subtile differences between groups of individuals. -Thus, having a way to put appart variables (ions) that may be of insufficient quality is key to limit the risk of wrong conclusion while analysing the data. +Thus, having a way to put appart variables (ions) that may be of insufficient quality is key to limit the risk of wrong conclusion while analysing the data. Beyond the issue of noise, the quality of the variables in one's dataset is a key question. In particular, in untargeted Metabolomics the data has particularities that make the endpoint analyses complicated, such as the disproportion between sample size and variable number, the complex redundancy and the noise issue already mentioned. When quality indicators can be computed before any statistical analysis, it is a real advantage: it can improve the efficiency of later statistical analysis and also help to select good candidates in biomarker approaches. -One common and powerful way to assess measurements quality is the use of pools. +One common and powerful way to assess measurements quality is the use of pools. Indeed, now that the data is corrected for signal drift, we expect to have stable intensities within pools since there is no biological variability among them. -We can evaluate the remaining variability in pools for each ion, and consider it to be noise and/or analytical effects that we may not have managed to remove for specific ions. -This enables the use of quality thresholds to get rid of ions we consider not to be sufficiently reliable regarding intensity values. +We can evaluate the remaining variability in pools for each ion, and consider it to be noise and/or analytical effects that we may not have managed to remove for specific ions. +This enables the use of quality thresholds to get rid of ions we consider not to be sufficiently reliable regarding intensity values. -In this tutorial, we will illustrate the use of pools by computing coefficients of variation (CV). +In this tutorial, we will illustrate the use of pools by computing coefficients of variation (CV). This metric allows to evaluate the variability accross samples independantly of the unit scale. -This advantage enables the definition of a unique threshold value that can be applied to every ions whatever the scales. +This advantage enables the definition of a unique threshold value that can be applied to every ions whatever the scales. CV are computed for each ion in your dataset, so you can filter ions using the chosen threshold. Note: the greater the CV value, the greater the variability. -The first use of CV we will illustrate here is the computation of CVs for the pools only. The idea here is to grasp the pool variability. -It is expected that pools do not vary much since they correspond to the same biological content. -Thus, if your pools vary too much, you can suspect that the corresponding ion is not reliable enough. +The first use of CV we will illustrate here is the computation of CVs for the pools only. The idea here is to grasp the pool variability. +It is expected that pools do not vary much since they correspond to the same biological content. +Thus, if your pools vary too much, you can suspect that the corresponding ion is not reliable enough. The second use of CV we will use here is a comparison between pool CVs and sample CVs. Indeed, we expect sample CVs to be higher than pool CVs since they share the same noise and analytical variability but also carry biological variability. -The **Quality Metrics** {% icon tool %} tool provides different CV indicators depending on what is in your sample list. -In particular, in the present case-study, it computes pool CVs along with a ratio between pool CVs and sample CVs. -These two indicators are automatically calculated as long as the input datasets provide the needed information +The **Quality Metrics** {% icon tool %} tool provides different CV indicators depending on what is in your sample list. +In particular, in the present case-study, it computes pool CVs along with a ratio between pool CVs and sample CVs. +These two indicators are automatically calculated as long as the input datasets provide the needed information (a *sampleType* column in the sampleMetadata file with the information of *pool* and *sample*). Thus, for these two indicators the tool can be run without specific parameters to set. @@ -628,7 +632,7 @@ Thus, for these two indicators the tool can be run without specific parameters t > > > > You may have noticed that we already used this tool previously in the tutorial. > > Although CV columns were already available at that time, here you need to use this tool again since this time indicators will be computed on intensities -> > corrected from the signal drifts and batch effects. +> > corrected from the signal drifts and batch effects. > > Note that what we are going to use this time is the tabular output, but while you are at it you can always check the pdf file if you want. > {: .comment} > @@ -644,7 +648,7 @@ For now no filter has been applied on the dataset. To remove the ions of insuffi But first, you need to determine the thresholds you want to use. Here we have two indicators we want to use. One is the pool CVs, the other is the ratio between pool CVs and sample CVs. -This means that we will need to set two distinct thresholds, and to add two numerical filters in **Generic Filter** {% icon tool %}. +This means that we will need to set two distinct thresholds, and to add two numerical filters in **Generic Filter** {% icon tool %}. > Using Generic Filter to filter the dataset > @@ -673,8 +677,8 @@ This means that we will need to set two distinct thresholds, and to add two nume > > > > > -> > Here, we took the opportunity of this filter to remove the pools from the dataset. -> > Indeed, pools will be no longer used in this tutorial's steps, so we can remove them from the dataset. +> > Here, we took the opportunity of this filter to remove the pools from the dataset. +> > Indeed, pools will be no longer used in this tutorial's steps, so we can remove them from the dataset. > {: .comment} > > @@ -699,7 +703,7 @@ This means that we will need to set two distinct thresholds, and to add two nume > > > > > > 1. The *0.3* value corresponds to the maximum value kept in the dataset ('Interval of values to remove: *upper*') regarding the -> > *pool_CV* column in your *Variable metadata* file. +> > *pool_CV* column in your *Variable metadata* file. > > Thus, ions with pool CV values superior to 0.3 are excluded from the dataset. > > To note, pool CV values are commonly considered as reflecting unstable ions when superior to 0.3. > > 2. The *1.0* value corresponds to the maximum value kept in the dataset ('Interval of values to remove: *upper*') regarding the @@ -711,47 +715,47 @@ This means that we will need to set two distinct thresholds, and to add two nume > {: .question} -Presented threshold values here are common thresholds in Metabolomics studies. +Presented threshold values here are common thresholds in Metabolomics studies. Please note that thresholds should be chosen consistently regarding the LC-MS device caracteristics and the study objectives. -Thus you may need to be more strict in some cases and less strict in others. -However, being less strict than usual should always be appropriately justified. +Thus you may need to be more strict in some cases and less strict in others. +However, being less strict than usual should always be appropriately justified. # Study-specific data processing: example normalising data according to a non-analytical effect -The previous steps presented in this tutorial illustrated categories of data processing that need to be consider whatever the study in +The previous steps presented in this tutorial illustrated categories of data processing that need to be consider whatever the study in untargeted LC-MS Metabolomics projects. In some cases the decision may be not to take actions, but in all cases the issue has to be considered to apprehend the reliability of the data for further analyses. Nonetheless, each study is unique and specific issues can arise that would need some additional data processing steps that are very specific to the study. Thus, sticking to a standard data processing workflow may not be sufficient in some cases. -For example, one commonly considered data processing step in biomarker discovery studies is the reduction of redundancy, -which is a complex question that can be addressed using various strategies depending on the study objectives. +For example, one commonly considered data processing step in biomarker discovery studies is the reduction of redundancy, +which is a complex question that can be addressed using various strategies depending on the study objectives. In this section, we will illustrate an example of study-specific data processing that involves a normalisation of the dataset. This will follow the standard *Identify->Formalise->Handle* design that is common to any data processing approach. ## Identifying unwanted variability -First thing is of course to identify an issue in the data that needs to be handled. +First thing is of course to identify an issue in the data that needs to be handled. In the previous steps, this was the "easy" part since we already had knowledge about what needed to be handled. -We adressed well-known problems, for which solutions may not always be easy, but at least we know there is something. +We adressed well-known problems, for which solutions may not always be easy, but at least we know there is something. -In study-specific data processing, it may not be that simple. +In study-specific data processing, it may not be that simple. Usually, to see something you need to search for it. This means you have hypotheses prior to the data analysis. -However, sometimes unwanted variability is unexpected, making the search process difficult. +However, sometimes unwanted variability is unexpected, making the search process difficult. -Fortunately, whether you have hypotheses or not, there is a common starting point in Metabolomics that can help you see whether an issue seems to need special attention. +Fortunately, whether you have hypotheses or not, there is a common starting point in Metabolomics that can help you see whether an issue seems to need special attention. This is the use of PCA, that we already saw in previous steps to highlight awaited effects. In this tutorial, we constructed the dataset so we already know what is the variability to identify. In a real project it can also be the case, or at least we can have hypotheses about effects we would not be supprised to observe. -In this tutorial example, we considered samples as being urine samples. +In this tutorial example, we considered samples as being urine samples. It is known that urine samples can vary a lot in total solute concentration, potentially limiting the possibility to detect specific biological effects. -Thus, even if it may not be the case, we can wonder whether the variability of concentration may have a huge effect on the metabolomic dataset we have. +Thus, even if it may not be the case, we can wonder whether the variability of concentration may have a huge effect on the metabolomic dataset we have. This is why, while making a PCA, we may want to plot the first components and see whether the variability displayed seems to match the concentration. -In the sampleMetadata file provided in this tutorial, you can find a column named *Osmo* that is meant to correspond to measures of osmolality, -one way to represent the overall concentration of urine samples. +In the sampleMetadata file provided in this tutorial, you can find a column named *Osmo* that is meant to correspond to measures of osmolality, +one way to represent the overall concentration of urine samples. Let's try to colour the PCA score plots according to that variable. For this, we will use the **Multivariate** {% icon tool %} Galaxy module. > Using Multivariate to visualise the two first components of a PCA @@ -781,12 +785,12 @@ The PCA scores plot representing the projection of samples on the two first comp Taking into account that the PCA was computed on Unit-Variance-scaled data (see the **Multivariate** {% icon tool %} tool's help section for more details), this result confirms the impact of total concentration on our dataset. -Since this effect in data is independant of the question of interest that we suppose we investigate in the study (in the case of this tutorial), -we may want to get rid of this effect to prevent a power reduction in further statistical analysis. +Since this effect in data is independant of the question of interest that we suppose we investigate in the study (in the case of this tutorial), +we may want to get rid of this effect to prevent a power reduction in further statistical analysis. ## Formalising the information -This step is essential to be able to perform any data processing action. +This step is essential to be able to perform any data processing action. At this step we have already identified some unwanted variability, however to get rid of it we need to formalise the information in a way or another. As an example, in the *Removing "trash" signals* section, we acknowledged the fact that the LC-MS device could detect signals even in samples that are not supposed to contain @@ -795,7 +799,7 @@ The solution we chose then was to use blank samples to represent the uninformati Thus, the blank samples' intensities are the formalisation of the unwanted variability identified. In this section's example, the formalisation is already done: we have the *Osmo* column in our data, that furthermore enabled to identify the unwanted variability. -Thus, we can move to the next step already. However please remember that this part is still mandatory in the process, even if sometimes it is straightforward. +Thus, we can move to the next step already. However please remember that this part is still mandatory in the process, even if sometimes it is straightforward. ## Getting rid of the variability @@ -803,19 +807,19 @@ Now that the unwanted variability is identified and formalised, the next step is Here we have a quantitative effect linked to the *Osmo* variable. There are plenty of ways to handle this kind of effect, and a proper approach would be first to study the impact of this effect on data in details, -and then only to choose which data processing approach to use to efficiently correct this effect. +and then only to choose which data processing approach to use to efficiently correct this effect. -In this tutorial, we assume the *Osmo* effect is a multiplicative one. -This appears to be true in the dataset: the data was built especially for this tutorial so there is no surprise here. +In this tutorial, we assume the *Osmo* effect is a multiplicative one. +This appears to be true in the dataset: the data was built especially for this tutorial so there is no surprise here. Then we can use a normalisation approach to handle the question. Please remember that this is a fake dataset used to illustrate the importance and philosophy of data processing for LC-MS untargeted Metabolomics analyses. -For proper handling of urine concentration in one's study, it may require more in-depth procedures depending on the protocole. +For proper handling of urine concentration in one's study, it may require more in-depth procedures depending on the protocole. The **Normalization** {% icon tool %} Galaxy module enables to perform an adequate correction of this effect. The approach of this module is to apply the normalisation operation to each sample to make the data from all samples directly comparable with each other (to take into account variations of the overall concentrations of samples due to biological and technical reasons). -It provides different normalisation strategies, some of them particularly of interest for NMR-based metabolomic datasets. -In our example, what we are interested in is a normalisation based on the *Osmo* variable, which is one of the possibilities provided by the module. +It provides different normalisation strategies, some of them particularly of interest for NMR-based metabolomic datasets. +In our example, what we are interested in is a normalisation based on the *Osmo* variable, which is one of the possibilities provided by the module. > Using Normalization to handle the total concentration effect > @@ -834,18 +838,18 @@ In our example, what we are interested in is a normalisation based on the *Osmo* {: .hands_on} With the parameter choices made here, the module generates two outputs: a log file and a dataMatrix file. -The dataMatrix corresponds to the normalised intensities. This is what will be used in further analyses. +The dataMatrix corresponds to the normalised intensities. This is what will be used in further analyses. ## Checking the resulting data -Now that the normalisation process has been applied, one may want to check the effect on data. +Now that the normalisation process has been applied, one may want to check the effect on data. Has the effect been entirely removed from the data? Can we see a significant reduction of it in the data? Similarly to the checks we could make in the context of signal drift and batch effect correction with before/after plots, -it is wise to check the impact of any normalisation process on the dataset. -This check can be conducted trying to highlight the unwanted effect the same way it was highlighted in the initial step. +it is wise to check the impact of any normalisation process on the dataset. +This check can be conducted trying to highlight the unwanted effect the same way it was highlighted in the initial step. In our tutorial example we observed the *Osmo* effect on the two first components of a PCA. -Thus, we can compute a similar PCA using the normalised intensities and see whether the two first components still cary the unwanted variability. +Thus, we can compute a similar PCA using the normalised intensities and see whether the two first components still cary the unwanted variability. > Using Multivariate to visualise the two first components of a PCA > @@ -868,15 +872,15 @@ Thus, we can compute a similar PCA using the normalised intensities and see whet {: .hands_on} When looking at the Scores plot available in the PDF output, we can see that the previously observed link between the colouring scale -and the components' coordinates is no longer obvious. -We can then conclude that the *Osmo* effect has been notably reduced inside the dataset. +and the components' coordinates is no longer obvious. +We can then conclude that the *Osmo* effect has been notably reduced inside the dataset. It is even possible to see, on the first component, what seems to be a clear seperation of two groups of samples. -Theses groups do not correspond to an *Osmo* effect: we can see that high and low values of *Osmo* are found in each group with balance. +Theses groups do not correspond to an *Osmo* effect: we can see that high and low values of *Osmo* are found in each group with balance. We could be curious and wonder whether these groups would reflect a biological state. -In the sampleMetadata we have, there is a *Group* column, supposedly representing two biological groups A and B. -Let's see how theses groups are projected on the final PCA we have. +In the sampleMetadata we have, there is a *Group* column, supposedly representing two biological groups A and B. +Let's see how theses groups are projected on the final PCA we have. > Using Multivariate to visualise the two first components of a PCA > @@ -910,17 +914,17 @@ Let's see how theses groups are projected on the final PCA we have. > > > > > > We can see that the groups that seemed to be shown on the first PCA component match perfectly the biological categories available in the *Group* -> > column of the sampleMetadata file. -> > We could say that the normalisation process enabled to highlight a distinct effect of the biological groups. +> > column of the sampleMetadata file. +> > We could say that the normalisation process enabled to highlight a distinct effect of the biological groups. > > Note that it is possible that this effect would be already noticeable without the normalisation process, -> > however the perfect separation on the first component after normalisation is promising to be able to extract -> > clear information about the source of this separation. +> > however the perfect separation on the first component after normalisation is promising to be able to extract +> > clear information about the source of this separation. > > > {: .solution} > {: .question} -Now that the dataset seems to be processed adequately, with no identified remaining unwanted main effect +Now that the dataset seems to be processed adequately, with no identified remaining unwanted main effect (the principal effect being the groups of interest for the study), we can consider the data processing of this dataset completed. This does not mean that further data processing steps could not be considered depending on the study objectives, but we reached the end of the example used to illustrate the meaning and importance of untargeted LC-MS metabolomic data processing @@ -929,8 +933,8 @@ through this tutorial. # Conclusion -The question of data filtering and correction must be addressed in all projects, even thought in some cases it may lead to the decision of no action on data. -In particular, the removing of "trash" signals, the signal drift and batch effect correction and the filtering of signals of insufficient quality -are common aspects to consider when dealing with LC-MS data. -Remember that depending on your context (type of samples, protocol specificities...) specific filters/normalisations may be needed, independently of standards ones. +The question of data filtering and correction must be addressed in all projects, even thought in some cases it may lead to the decision of no action on data. +In particular, the removing of "trash" signals, the signal drift and batch effect correction and the filtering of signals of insufficient quality +are common aspects to consider when dealing with LC-MS data. +Remember that depending on your context (type of samples, protocol specificities...) specific filters/normalisations may be needed, independently of standards ones. Once you complete your customed processing procedure, your tables are ready for biologically-oriented exploration! diff --git a/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md b/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md index 71caf3b879e16f..d6f1f82f86264d 100644 --- a/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md +++ b/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md @@ -22,6 +22,15 @@ contributors: - lecorguille - workflow4metabolomics +contributions: + authorship: + - melpetera + - jfrancoismartin + - lecorguille + - workflow4metabolomics + editing: + funding: + - elixir-europe --- @@ -336,9 +345,9 @@ Once your sampleMetadata table is ready, you can proceed to the upload. In this > The class column > > Depending on further choices, the sampleMetadata file can be decisive. -> It can be used to colour some plots, but also for ion selection (see further in the tutorial). +> It can be used to colour some plots, but also for ion selection (see further in the tutorial). > Please note that the information needed for these steps **should be given as the second column of the sampleMetadata file**, -> the first one being the samples' identifiers. +> the first one being the samples' identifiers. {: .warning} diff --git a/topics/metabolomics/tutorials/lcms/tutorial.md b/topics/metabolomics/tutorials/lcms/tutorial.md index b094c8982aaa37..2f53eba9fa7265 100644 --- a/topics/metabolomics/tutorials/lcms/tutorial.md +++ b/topics/metabolomics/tutorials/lcms/tutorial.md @@ -15,12 +15,17 @@ key_points: - To process untargeted LC-MS metabolomic data, you need a large variety of steps and tools. - Although main steps are standard, various ways to combined tools exist, depending on your data. - Resources are available in Galaxy, but do not forget that you need appropriate knowledge to perform a relevant analysis. -contributors: -- melpetera -- lecorguille -- jfrancoismartin -- yguitton -- workflow4metabolomics + +contributions: + authorship: + - melpetera + - lecorguille + - jfrancoismartin + - yguitton + - workflow4metabolomics + editing: + funding: + - elixir-europe --- diff --git a/topics/metabolomics/tutorials/msi-analyte-distribution/tutorial.md b/topics/metabolomics/tutorials/msi-analyte-distribution/tutorial.md index f4f9d1bd0d6009..45c7f8a12c39cf 100644 --- a/topics/metabolomics/tutorials/msi-analyte-distribution/tutorial.md +++ b/topics/metabolomics/tutorials/msi-analyte-distribution/tutorial.md @@ -9,9 +9,9 @@ questions: - In which compartments of the chilli are the measured VOCs located? - In which compartment of the chilli is the capsaicin located? objectives: -- Plot average mass spectra and overlaid mass spectra of single spectra. -- Filter MSI data for a specific m/z range. -- Automatic generation of many m/z images. +- Plot average mass spectra and overlaid mass spectra of single spectra. +- Filter MSI data for a specific m/z range. +- Automatic generation of many m/z images. - Overlay the distribution of several m/z in one image. time_estimation: 1H key_points: @@ -19,17 +19,22 @@ key_points: - MSI data can be filtered for m/z ranges of interest to speed up analysis time. - Distribution images for many analytes can be automatically generated with the MSI m/z image tool. - The MSI m/z image tools allow overlaying the distribution images for several analytes. -contributors: -- foellmelanie -- MarenStillger +contributions: + authorship: + - foellmelanie + - MarenStillger + editing: + funding: + - elixir-europe + - uni-freiburg --- -Mass spectrometry imaging (MSI) is applied to measure the spatial distribution of hundreds of biomolecules in a sample. A mass spectrometer scans over the entire sample and collects a mass spectrum every 5-200 µm. This results in thousands of spots (or pixels) for each of which a mass spectrum is acquired. Each mass spectrum consists of hundreds of analytes that are measured by their mass-to-charge (m/z) ratio. For each analyte the peak intensity in the mass spectra of every pixel is known and can be set together to map the spatial distribution of the analyte in the sample. +Mass spectrometry imaging (MSI) is applied to measure the spatial distribution of hundreds of biomolecules in a sample. A mass spectrometer scans over the entire sample and collects a mass spectrum every 5-200 µm. This results in thousands of spots (or pixels) for each of which a mass spectrum is acquired. Each mass spectrum consists of hundreds of analytes that are measured by their mass-to-charge (m/z) ratio. For each analyte the peak intensity in the mass spectra of every pixel is known and can be set together to map the spatial distribution of the analyte in the sample. -The technique has a broad range of applications as it is able to measure many different kinds of analytes such as peptides, proteins, metabolites or chemical compounds in a large variety of samples such as cells, tissues and liquid biopsies. Application areas include pharmacokinetic studies, biomarker discovery, molecular pathology, forensic studies, plant research and material sciences. The strength of MSI is the simultaneous analysis of hundreds of analytes in an unbiased, untargeted, label-free, fast and affordable measurement while maintaining morphological information. +The technique has a broad range of applications as it is able to measure many different kinds of analytes such as peptides, proteins, metabolites or chemical compounds in a large variety of samples such as cells, tissues and liquid biopsies. Application areas include pharmacokinetic studies, biomarker discovery, molecular pathology, forensic studies, plant research and material sciences. The strength of MSI is the simultaneous analysis of hundreds of analytes in an unbiased, untargeted, label-free, fast and affordable measurement while maintaining morphological information. Depending on the analyte of interest and the application, different mass spectrometers are used. A mass spectrometer measures the analytes by ionizing, evaporating and sorting them by their mass-to-charge (m/z) ratio. Put simply, a mass spectrometer consists basically of three parts: an ionization source, a mass analyzer and a detector. The most common ionization sources for MSI are MALDI (Matrix Assisted Laser Desorption/Ionization), DESI (Desorption Electrospray Ionization) and SIMS (Secondary Ion Mass Spectrometry). @@ -37,9 +42,9 @@ Depending on the analyte of interest and the application, different mass spectro One common type of mass spectrometer for MSI is a MALDI Time-Of-Flight (MALDI-TOF) device. During MALDI ionization, a laser is fired onto the sample, which has been covered with a special matrix that absorbs the laser energy and transfers it to the analytes. This process vaporizes and ionizes the analytes. As they are now charged, they can be accelerated in an electric field towards the TOF tube. The time of flight through the tube to the detector is measured, which allows calculation of the mass over charge (m/z) of the analyte, as both mass and charge are correlated with time of flight. During measurement, complete mass spectra with hundreds of m/z - intensity pairs are acquired in thousands of sample plots, leading to large and complex datasets. Each mass spectrum is annotated with coordinates (x,y) that define its location in the sample. This allows visualization of the intensity distribution of each m/z feature in the sample as a heatmap. -Depending on the analyte of interest, the sample type and the mass spectrometer, the sample preparation steps as well as the properties of the acquired data vary. Apart from these differences, the preparation and measurement of the sample is normally straightforward, while the analysis of the large and complex dataset is the main bottleneck of a MSI experiment. +Depending on the analyte of interest, the sample type and the mass spectrometer, the sample preparation steps as well as the properties of the acquired data vary. Apart from these differences, the preparation and measurement of the sample is normally straightforward, while the analysis of the large and complex dataset is the main bottleneck of a MSI experiment. -In this tutorial we will determine analytes with a localized distribution in a chilli cross section. The data analysis steps can be transferred to any other application in which the morphological distribution of an analyte in a sample is analyzed. A common application is studying the distribution of a drug and its metabolites in an animal model to better understand pharmacokinetic properties of the drug. +In this tutorial we will determine analytes with a localized distribution in a chilli cross section. The data analysis steps can be transferred to any other application in which the morphological distribution of an analyte in a sample is analyzed. A common application is studying the distribution of a drug and its metabolites in an animal model to better understand pharmacokinetic properties of the drug. > @@ -53,11 +58,11 @@ In this tutorial we will determine analytes with a localized distribution in a c # Chilli dataset -In this tutorial we will use the chilli dataset generated in the lab of Robert Winkler at the CINVESTAV institute in Irapuato (Mexico) and deposited at [Zenodo](https://zenodo.org/record/484496). The data was acquired to demonstrate their self-made low-cost 'plug and play' MSI system as well as their open source R scripts and to show they allow the direct spatially resolved detection of volatile organic compounds (VOCs) from plant tissue ({% cite Maldonado_Torres_2014 %} and {% cite Gamboa_Becerra_2015 %}). +In this tutorial we will use the chilli dataset generated in the lab of Robert Winkler at the CINVESTAV institute in Irapuato (Mexico) and deposited at [Zenodo](https://zenodo.org/record/484496). The data was acquired to demonstrate their self-made low-cost 'plug and play' MSI system as well as their open source R scripts and to show they allow the direct spatially resolved detection of volatile organic compounds (VOCs) from plant tissue ({% cite Maldonado_Torres_2014 %} and {% cite Gamboa_Becerra_2015 %}). -The mass spectrometer consisted of a low-temperature plasma (LTP) ionization source coupled to a quadrupole mass analyzer. This ionization source allows measurement of semi-volatile organic compunds directly from fresh material under ambient conditions without destruction of the sample. To get an idea how this set up and the chilli looks, have a look at their [video](https://www.youtube.com/watch?v=NbZ1QqTZvXM) and [website](http://lababi.bioprocess.org/). The chilli (*Capsicum annuum*) was purchased from a local market in Irapuato, Mexico and longitudinally cut into a slice of 80 x 35 mm and 4 mm thickness and laid onto a glass slide. Mass spectra were acquired with a spatial resolution of 1 mm and in a mass range from 15 – 2000 m/z to measure semi-volatile and volatile organic compounds (VOCs). +The mass spectrometer consisted of a low-temperature plasma (LTP) ionization source coupled to a quadrupole mass analyzer. This ionization source allows measurement of semi-volatile organic compunds directly from fresh material under ambient conditions without destruction of the sample. To get an idea how this set up and the chilli looks, have a look at their [video](https://www.youtube.com/watch?v=NbZ1QqTZvXM) and [website](http://lababi.bioprocess.org/). The chilli (*Capsicum annuum*) was purchased from a local market in Irapuato, Mexico and longitudinally cut into a slice of 80 x 35 mm and 4 mm thickness and laid onto a glass slide. Mass spectra were acquired with a spatial resolution of 1 mm and in a mass range from 15 – 2000 m/z to measure semi-volatile and volatile organic compounds (VOCs). -We will check the properties of the dataset and then perform a similar analysis to that described by {% cite Gamboa_Becerra_2015 %}: plotting an average mass spectrum, filtering the dataset for the relevant m/z range, automatical generation of m/z images to find analytes that correspond to morphological features and generating an image that overlays three different analytes. +We will check the properties of the dataset and then perform a similar analysis to that described by {% cite Gamboa_Becerra_2015 %}: plotting an average mass spectrum, filtering the dataset for the relevant m/z range, automatical generation of m/z images to find analytes that correspond to morphological features and generating an image that overlays three different analytes. ![Chilli](../../images/msi_distribution_chilli.png "Cross section of a chilli in a similar orientation as in the publication by Gamboa-Becerra") @@ -131,7 +136,7 @@ Before starting any analysis it is important to check the characteristics and qu > - Press **Run Tool** > > > Properties of the imzML file -> > To set the parameters for the mass spectrometry imaging tools correctly the following three parameters should be known about the dataset: 1) is the imzML file type processed or continuous, 2) are the spectra in profile or centroided mode and 3) the accuracy of the mass spectrometer. For the chilli dataset the publication states that it is a processed imzML type in centroided mode. This information can also be extracted by opening the local imzML component of the file in a text editor or web browser. Line 10 and 11 state: +> > To set the parameters for the mass spectrometry imaging tools correctly the following three parameters should be known about the dataset: 1) is the imzML file type processed or continuous, 2) are the spectra in profile or centroided mode and 3) the accuracy of the mass spectrometer. For the chilli dataset the publication states that it is a processed imzML type in centroided mode. This information can also be extracted by opening the local imzML component of the file in a text editor or web browser. Line 10 and 11 state: > > > > > > @@ -150,9 +155,9 @@ Before starting any analysis it is important to check the characteristics and qu > > > > > > -> > This means that the file consists of 4166 spectra (count="4166") and that the imzML type is 'processed' as the first spectrum (scan="1") has 11127 m/z values (defaultArrayLength="11127) while the following spectra (not shown here) have different numbers of m/z values. In processed imzML files each spectrum has an individual m/z axis, in contrast to the continuous imzML type, where all spectra have the same m/z axis. In many software tools processed imzML files are not or only partly supported. The MSI tools in Galaxy are based on [Cardinal](http://cardinalmsi.org/) and therefore support imzML files, but require a binning of the m/z values while reading the file. The bin size should be chosen according to the m/z accuracy of the mass spectrometer. For the chilli dataset we therefore use 0.1 m/z, as this results in m/z bins of 0.2 m/z which corresponds to the m/z step size that was used in the publication. +> > This means that the file consists of 4166 spectra (count="4166") and that the imzML type is 'processed' as the first spectrum (scan="1") has 11127 m/z values (defaultArrayLength="11127) while the following spectra (not shown here) have different numbers of m/z values. In processed imzML files each spectrum has an individual m/z axis, in contrast to the continuous imzML type, where all spectra have the same m/z axis. In many software tools processed imzML files are not or only partly supported. The MSI tools in Galaxy are based on [Cardinal](http://cardinalmsi.org/) and therefore support imzML files, but require a binning of the m/z values while reading the file. The bin size should be chosen according to the m/z accuracy of the mass spectrometer. For the chilli dataset we therefore use 0.1 m/z, as this results in m/z bins of 0.2 m/z which corresponds to the m/z step size that was used in the publication. > > -> > Regarding the spectra, line 10 states that the spectra are in profile mode while the 'run' section defines spectrum 1 as a centroided spectrum. As the 'run' section is directly linked to the spectra in the ibd file, this is the information to trust. Furthermore, the 'dataProcessingList' element gives a hint that peak picking was performed, which is confirmed in the publication. +> > Regarding the spectra, line 10 states that the spectra are in profile mode while the 'run' section defines spectrum 1 as a centroided spectrum. As the 'run' section is directly linked to the spectra in the ibd file, this is the information to trust. Furthermore, the 'dataProcessingList' element gives a hint that peak picking was performed, which is confirmed in the publication. > {: .comment} > {: .hands_on} @@ -168,26 +173,26 @@ Before starting any analysis it is important to check the characteristics and qu > > > > 1. The measured m/z range was m/z 15 - 2000. > > 2. 4166 spectra were measured. -> > 3. The rough shape of the chilli section is visible in the "Number of peaks per spectrum"; "Total Ion Current" and "PCA result" images. +> > 3. The rough shape of the chilli section is visible in the "Number of peaks per spectrum"; "Total Ion Current" and "PCA result" images. > > > {: .solution} > {: .question} -Open the quality report with the eye button and check the summary table on the first page to answer question 1 and 2. The shape of the chilli can roughly be seen in the images showing the number of peaks per spectrum and the total ion current, with higher values in the middle part of the fruit that roughly corresponds to the placenta. In the PCA image the complete chilli, except for the lowest part of the fruit, is visible in light colours compared to the dark background. +Open the quality report with the eye button and check the summary table on the first page to answer question 1 and 2. The shape of the chilli can roughly be seen in the images showing the number of peaks per spectrum and the total ion current, with higher values in the middle part of the fruit that roughly corresponds to the placenta. In the PCA image the complete chilli, except for the lowest part of the fruit, is visible in light colours compared to the dark background. -To further investigate mass spectra of different chilli compartments we select one pixel for each tissue area (pericarp, placenta and seeds) from the total ion current image. Knowing the exact shape of the chilli tissue from Figure 4 of the publication helps to find the corresponding areas. This are the pixels we have chosen for the tutorial: +To further investigate mass spectra of different chilli compartments we select one pixel for each tissue area (pericarp, placenta and seeds) from the total ion current image. Knowing the exact shape of the chilli tissue from Figure 4 of the publication helps to find the corresponding areas. This are the pixels we have chosen for the tutorial: seeds: x=39 y=53, placenta: x=50 y=44, pericarp: x=25 y=60 -The relevant m/z range for VOCs was not known before the measurement and therefore chosen quite liberally. The average mass spectra plots give a hint about the relevant m/z range of the sample with most peaks below 750 m/z and no distinct peaks above 750 m/z. +The relevant m/z range for VOCs was not known before the measurement and therefore chosen quite liberally. The average mass spectra plots give a hint about the relevant m/z range of the sample with most peaks below 750 m/z and no distinct peaks above 750 m/z. ![quality report](../../images/msi_distribution_qc.png "Example plots from the quality report") ## Plotting average mass spectra -We will follow up on the average mass spectra plots from the quality control report as well as on the differences between mass spectra from different chilli compartments. First, we generate more zoomed in mass spectra plots to get an idea about the m/z range that is relevant for VOCs. Next, we will plot and compare mass spectra that derive from different regions of the chilli. +We will follow up on the average mass spectra plots from the quality control report as well as on the differences between mass spectra from different chilli compartments. First, we generate more zoomed in mass spectra plots to get an idea about the m/z range that is relevant for VOCs. Next, we will plot and compare mass spectra that derive from different regions of the chilli. > Average mass spectra > @@ -224,19 +229,19 @@ We will follow up on the average mass spectra plots from the quality control rep > > -> 1. What is the approximate m/z of the heaviest peak visible in the average spectra plot of the complete m/z range? +> 1. What is the approximate m/z of the heaviest peak visible in the average spectra plot of the complete m/z range? > 2. What is the approximate m/z of the peak with the highest average intensity? > > > > > > > 1. There is a tiny peak at m/z ~760. -> > 2. The highest peak is at ~80 m/z, which can best be seen in the mass spectrum between 15 and 200 m/z. +> > 2. The highest peak is at ~80 m/z, which can best be seen in the mass spectrum between 15 and 200 m/z. > > > {: .solution} > {: .question} -In the average spectrum of the complete m/z range there are no visible peaks above 800 m/z. Most of the high intensity peaks are below 300 Th, which corresponds to the typical masses of VOCs. Even in the zoomed mass spectra between 1500 and 2000 m/z only a few distinct peaks can be identified in the noise. In accordance with {% cite Gamboa_Becerra_2015 %}, we will restrict further analysis to the m/z range between 15 and 1000. +In the average spectrum of the complete m/z range there are no visible peaks above 800 m/z. Most of the high intensity peaks are below 300 Th, which corresponds to the typical masses of VOCs. Even in the zoomed mass spectra between 1500 and 2000 m/z only a few distinct peaks can be identified in the noise. In accordance with {% cite Gamboa_Becerra_2015 %}, we will restrict further analysis to the m/z range between 15 and 1000. ![Average plots](../../images/msi_distribution_average_spectra.png "Complete and zoomed in average mass spectra") @@ -276,12 +281,12 @@ We will rerun the MSI plot spectra tool and add annotation to the pixels that co > > -> 1. Which compartment does the highest intensity peak belong to? +> 1. Which compartment does the highest intensity peak belong to? > 2. Which compartment does the third-highest intensity peak belong to? > > > > > -> > 1. The highest intensity peak at around 80 m/z belongs to the spectrum that is located in the pericarp of the chilli. +> > 1. The highest intensity peak at around 80 m/z belongs to the spectrum that is located in the pericarp of the chilli. > > 2. The peak at around 60 m/z belongs to the spectrum that is located in the seeds. > {: .solution} > @@ -314,20 +319,20 @@ The single spectra that derive from different chilli compartments show some majo > > > > > -> > 1. After filtering 4926 m/z features are left. +> > 1. After filtering 4926 m/z features are left. > > 2. The dataset originally had 9926 m/z features; after filtering 4926 are left, which means that 5000 were removed. > > > {: .solution} > {: .question} -The MSI tools are only able to write outputs as continuous imzML format; therefore, from now on the dataset is in continuous imzML format and "processed imzML file" can be set to 'no' in the following tools. +The MSI tools are only able to write outputs as continuous imzML format; therefore, from now on the dataset is in continuous imzML format and "processed imzML file" can be set to 'no' in the following tools. # Multiple and overlayed analyte images ## Automatic generation of analyte images -The main question for the chilli dataset is which m/z features have a localized distribution in the fruit. This question can be addressed by automatically generating distribution images for all analytes (m/z features) and then visually identifying which features are localized in specific compartments of the fruit. {% cite Gamboa_Becerra_2015 %} scan the m/z range with a step size of 0.2 m/z and a tolerance of 0.4 Th, which resulted in more than 2000 images which they visually explored for localized features. In this training we will only generate images for the two mass ranges where we have already seen high intensity peaks in the spectra plots: around 60 and 80 m/z. +The main question for the chilli dataset is which m/z features have a localized distribution in the fruit. This question can be addressed by automatically generating distribution images for all analytes (m/z features) and then visually identifying which features are localized in specific compartments of the fruit. {% cite Gamboa_Becerra_2015 %} scan the m/z range with a step size of 0.2 m/z and a tolerance of 0.4 Th, which resulted in more than 2000 images which they visually explored for localized features. In this training we will only generate images for the two mass ranges where we have already seen high intensity peaks in the spectra plots: around 60 and 80 m/z. This requires three steps. Firstly, all m/z features of the dataset with the MSI data exporter are extracted. Secondly, all m/z features are filtered in the m/z ranges that we are interested in: between 55 and 65 Th, as well as between 75 and 85 m/z (this step can be skipped to obtain distribution images for all features). Thirdly, the MSI m/z image tool is used to automatically generate distribution images for each of the m/z from the second step. As the data is already binned to 0.2 Th, only the m/z tolerance of 0.4 m/z has to be specified. The large tolerance was chosen to obtain images from overlapping m/z ranges to make the signal intensity more robust against small m/z inaccuracies in individual mass spectra. @@ -357,7 +362,7 @@ This requires three steps. Firstly, all m/z features of the dataset with the MSI > > > 1. Which are the two analyte images that show the most localized distribution? -> 2. Which compartments are the two features localized in? +> 2. Which compartments are the two features localized in? > > > > > @@ -381,7 +386,7 @@ After m/z features with a localized distribution have been found, it is interest ## Overlay image for three analyte features -To get an idea about the distribution of capsaicin in the chilli we will plot its distribution in an overlay image with the two unknown but localized features 62.2 m/z and 84.2 m/z. +To get an idea about the distribution of capsaicin in the chilli we will plot its distribution in an overlay image with the two unknown but localized features 62.2 m/z and 84.2 m/z. > Overlay image for several m/z features > @@ -422,11 +427,11 @@ To get an idea about the distribution of capsaicin in the chilli we will plot it > > -> 1. Which compartment is capsaicin located in? +> 1. Which compartment is capsaicin located in? > > > > > -> > 1. Capsaicin is located in the middle of the placenta, as well as partly in the seeds. +> > 1. Capsaicin is located in the middle of the placenta, as well as partly in the seeds. > > > {: .solution} > @@ -436,7 +441,7 @@ To get an idea about the distribution of capsaicin in the chilli we will plot it # Conclusion -This tutorial covered the steps to explore the properties of a dataset and visualize the morphological distribution of molecules. +This tutorial covered the steps to explore the properties of a dataset and visualize the morphological distribution of molecules. ![workflow](../../images/msi_distribution_workflow.png "Workflow that was used during this tutorial.") From f74b887817c7390df158503823af8eca93dc278e Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 17:14:03 +0200 Subject: [PATCH 17/41] add shortname --- ORGANISATIONS.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ORGANISATIONS.yaml b/ORGANISATIONS.yaml index a177399511dfde..7a55f6b3e7d229 100644 --- a/ORGANISATIONS.yaml +++ b/ORGANISATIONS.yaml @@ -76,6 +76,7 @@ elixir-europe: ror: "044rwnt51" elixir-goblet-ttt: + short_name: Goblet name: ELIXIR Goblet Train the Trainers joined: 2022-09 github: false From a13a51ee41d2f89f37b7f1516217c44d38ad53cc Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 17:15:18 +0200 Subject: [PATCH 18/41] link to hall of fame pages in logo soup --- _layouts/home.html | 38 +++++++++----------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/_layouts/home.html b/_layouts/home.html index df63956062c66c..70452694caa617 100644 --- a/_layouts/home.html +++ b/_layouts/home.html @@ -304,47 +304,27 @@

Acknowledgment and Funding

- {% for entity in site.data['funders'] %} + {% for entity in site.data['grants'] %}
- {% if entity[1].url %} - - {% else %} - - {% endif %} +
- {% if entity[1].url %} - + {{ entity[1].short_name | default: entity[1].name }} - {% else %} - {{ entity[1].short_name | default: entity[1].name }} - {% endif %}
{% endfor %} {% for entity in site.data['organisations'] %}
- {% if entity[1].url %} - - {% else %} - - {% endif %} +
- {% if entity[1].url %} - + {{ entity[1].short_name | default: entity[1].name }} - {% else %} - {{ entity[1].short_name | default: entity[1].name }} - {% endif %}
{% endfor %} From c73c15d6dab7c5378d4c6e57c4d03070732f7ed8 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 17:15:38 +0200 Subject: [PATCH 19/41] reorder hof page for orgs --- _layouts/contributor_index.html | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/_layouts/contributor_index.html b/_layouts/contributor_index.html index 14dba6728c5f7d..f2e185c12ddad4 100644 --- a/_layouts/contributor_index.html +++ b/_layouts/contributor_index.html @@ -25,6 +25,8 @@

{% if entity.funder%}

The activities listed below were partially funded by this project.

+

{{ entity.funding_statement | markdownify }}

+ {% endif %} @@ -78,12 +80,9 @@

Former Members

Contributions

- {% if entity.funder %} - {{ entity.funding_statement | markdownify }} - {% endif %}

- The following list includes only slides and tutorials where the individual has been added to the contributor list. This may not include the sum total of their contributions to the training materials (e.g. GTN css or design, tutorial datasets, workflow development, etc.) unless described by a news post. + The following list includes only slides and tutorials where the individual or organisation has been added to the contributor list. This may not include the sum total of their contributions to the training materials (e.g. GTN css or design, tutorial datasets, workflow development, etc.) unless described by a news post. {% unless entity.github == false %}

From ce8a784bdb4509aaf2100c5465db0634150581f1 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 17:21:28 +0200 Subject: [PATCH 20/41] rename funders to grants --- _plugins/gtn/metrics.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_plugins/gtn/metrics.rb b/_plugins/gtn/metrics.rb index 73e252eb1be78c..64bc679083d7a4 100644 --- a/_plugins/gtn/metrics.rb +++ b/_plugins/gtn/metrics.rb @@ -109,8 +109,8 @@ def self.collect_metrics(site) type: 'counter' }, 'gtn_funders_total' => { - value: segment(site.data['funders'].values.reject { |x| x['halloffame'] == 'no' }, 'orcid'), - help: 'Total number of funders', + value: segment(site.data['grants'].values.reject { |x| x['halloffame'] == 'no' }, 'orcid'), + help: 'Total number of grants', type: 'counter' }, 'gtn_tutorials_total' => { From 6a55c197946a045259dd308132b70cb47a43bb0b Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 17:26:32 +0200 Subject: [PATCH 21/41] rename funders to grants --- _plugins/api.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_plugins/api.rb b/_plugins/api.rb index d94ac92034f8ba..c211813341dd8b 100644 --- a/_plugins/api.rb +++ b/_plugins/api.rb @@ -164,7 +164,7 @@ def generate(site) # Contributors Jekyll.logger.debug '[GTN/API] Contributors, Funders, Organisations' - %w[contributors funders organisations].each do |type| + %w[contributors grants organisations].each do |type| page2 = PageWithoutAFile.new(site, '', 'api/', "#{type}.json") page2.content = JSON.pretty_generate(site.data[type].map { |c, _| mapContributor(site, c) }) page2.data['layout'] = nil From 60caf451aa203d08b05c40c6d3c2e7f70a69f881 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 17:47:21 +0200 Subject: [PATCH 22/41] add empty lists for placeholder contribution keys --- topics/epigenetics/tutorials/ewas-suite/tutorial.md | 4 ++-- topics/epigenetics/tutorials/hicexplorer/tutorial.md | 4 ++-- topics/metabolomics/tutorials/lcms-dataprocessing/tutorial.md | 2 +- topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md | 2 +- topics/metabolomics/tutorials/lcms/tutorial.md | 2 +- .../tutorials/msi-analyte-distribution/tutorial.md | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/topics/epigenetics/tutorials/ewas-suite/tutorial.md b/topics/epigenetics/tutorials/ewas-suite/tutorial.md index ca03203c1734dd..387843ccf08b82 100644 --- a/topics/epigenetics/tutorials/ewas-suite/tutorial.md +++ b/topics/epigenetics/tutorials/ewas-suite/tutorial.md @@ -18,8 +18,8 @@ contributions: - kkamieniecka - khaled196 - poterlowicz-lab - editing: - testing: + editing: [] + testing: [] funding: - elixir-europe --- diff --git a/topics/epigenetics/tutorials/hicexplorer/tutorial.md b/topics/epigenetics/tutorials/hicexplorer/tutorial.md index 8d5698671590e7..96e06462b25aab 100644 --- a/topics/epigenetics/tutorials/hicexplorer/tutorial.md +++ b/topics/epigenetics/tutorials/hicexplorer/tutorial.md @@ -18,8 +18,8 @@ contributions: - fidelram - vivekbhr - polkhe - editing: - testing: + editing: [] + testing: [] funding: - elixir-europe - deNBI diff --git a/topics/metabolomics/tutorials/lcms-dataprocessing/tutorial.md b/topics/metabolomics/tutorials/lcms-dataprocessing/tutorial.md index b15ee612543c2d..ac312cf9f55fc0 100644 --- a/topics/metabolomics/tutorials/lcms-dataprocessing/tutorial.md +++ b/topics/metabolomics/tutorials/lcms-dataprocessing/tutorial.md @@ -21,7 +21,7 @@ contributions: authorship: - melpetera - workflow4metabolomics - editing: + editing: [] funding: - elixir-europe --- diff --git a/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md b/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md index d6f1f82f86264d..431a8215984e7a 100644 --- a/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md +++ b/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md @@ -28,7 +28,7 @@ contributions: - jfrancoismartin - lecorguille - workflow4metabolomics - editing: + editing: [] funding: - elixir-europe --- diff --git a/topics/metabolomics/tutorials/lcms/tutorial.md b/topics/metabolomics/tutorials/lcms/tutorial.md index 2f53eba9fa7265..5729e20598d044 100644 --- a/topics/metabolomics/tutorials/lcms/tutorial.md +++ b/topics/metabolomics/tutorials/lcms/tutorial.md @@ -23,7 +23,7 @@ contributions: - jfrancoismartin - yguitton - workflow4metabolomics - editing: + editing: [] funding: - elixir-europe diff --git a/topics/metabolomics/tutorials/msi-analyte-distribution/tutorial.md b/topics/metabolomics/tutorials/msi-analyte-distribution/tutorial.md index 45c7f8a12c39cf..488f98c158ddeb 100644 --- a/topics/metabolomics/tutorials/msi-analyte-distribution/tutorial.md +++ b/topics/metabolomics/tutorials/msi-analyte-distribution/tutorial.md @@ -24,7 +24,7 @@ contributions: authorship: - foellmelanie - MarenStillger - editing: + editing: [] funding: - elixir-europe - uni-freiburg From 49420458001b6fdb83b5972cffcac0096c70ed02 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 17:48:03 +0200 Subject: [PATCH 23/41] add a bit more debugging info --- _plugins/api.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/_plugins/api.rb b/_plugins/api.rb index c211813341dd8b..c6fdb5943eda77 100644 --- a/_plugins/api.rb +++ b/_plugins/api.rb @@ -209,6 +209,7 @@ def generate(site) out = site.data[topic].dup out['materials'] = TopicFilter.topic_filter(site, topic).map do |x| q = x.dup + puts q['url'] q['contributors'] = Gtn::Contributors.get_contributors(q).dup.map do |c| mapContributor(site, c) end From 60c758328bb9eb6ac2a737f19d8ee1f51e0b5a32 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 17:51:22 +0200 Subject: [PATCH 24/41] rephrase --- _layouts/contributor_index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_layouts/contributor_index.html b/_layouts/contributor_index.html index f2e185c12ddad4..0245cb1b591500 100644 --- a/_layouts/contributor_index.html +++ b/_layouts/contributor_index.html @@ -24,7 +24,7 @@

{{ entity.name | default: page.contributor }}

{% if entity.funder%} -

The activities listed below were partially funded by this project.

+

The materials and activities listed below were funded (to some degree) by this project.

{{ entity.funding_statement | markdownify }}

{% endif %} From d29f38e8aca158355a5acd31eb94ea85bdab67ad Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 17:57:27 +0200 Subject: [PATCH 25/41] add citations for linter --- topics/epigenetics/tutorials/hicexplorer/tutorial.bib | 0 topics/epigenetics/tutorials/hicexplorer/tutorial.md | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 topics/epigenetics/tutorials/hicexplorer/tutorial.bib diff --git a/topics/epigenetics/tutorials/hicexplorer/tutorial.bib b/topics/epigenetics/tutorials/hicexplorer/tutorial.bib new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/topics/epigenetics/tutorials/hicexplorer/tutorial.md b/topics/epigenetics/tutorials/hicexplorer/tutorial.md index 96e06462b25aab..1371ef7cf9907c 100644 --- a/topics/epigenetics/tutorials/hicexplorer/tutorial.md +++ b/topics/epigenetics/tutorials/hicexplorer/tutorial.md @@ -246,7 +246,7 @@ The steps so far would have led to long run times if real data would have been u # TAD calling -“The partitioning of chromosomes into topologically associating domains (TADs) is an emerging concept that is reshaping our understanding of gene regulation in the context of physical organization of the genome” [Ramirez et al. 2017](https://doi.org/10.1101/115063). +“The partitioning of chromosomes into topologically associating domains (TADs) is an emerging concept that is reshaping our understanding of gene regulation in the context of physical organization of the genome” ({% cite Ramrez2017 %}). TAD calling works in two steps: First HiCExplorer computes a TAD-separation score based on a z-score matrix for all bins. Then those bins having a local minimum of the TAD-separation score are evaluated with respect to the surrounding bins to assign a p-value. Then a cutoff is applied to select the bins more likely to be TAD boundaries. @@ -383,7 +383,7 @@ The resulting image should look like this one: # Loop detection -In Hi-C data, the term `loop` refers to a 3D structure which represents enhancer-promoter, gene, architectural or polycomb-mediated interactions. These interactions have the characteristics to be enriched in a single region compared to the local background. These loops are also called long-range interactions with an expected maximum distance of 2 MB (see [Rao et al. 2014](https://doi.org/10.1016/j.cell.2014.11.021)). +In Hi-C data, the term `loop` refers to a 3D structure which represents enhancer-promoter, gene, architectural or polycomb-mediated interactions. These interactions have the characteristics to be enriched in a single region compared to the local background. These loops are also called long-range interactions with an expected maximum distance of 2 MB (see {% cite Rao2014 %}). ![Loops visualization](../../images/loops_bonev_cavalli.png) From c850e75123959b1c2fba4b821ddfd9bd05d3f017 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 17:59:32 +0200 Subject: [PATCH 26/41] adjust heading level for linter --- topics/epigenetics/tutorials/hicexplorer/tutorial.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/topics/epigenetics/tutorials/hicexplorer/tutorial.md b/topics/epigenetics/tutorials/hicexplorer/tutorial.md index 1371ef7cf9907c..f53a4f2d3ca766 100644 --- a/topics/epigenetics/tutorials/hicexplorer/tutorial.md +++ b/topics/epigenetics/tutorials/hicexplorer/tutorial.md @@ -225,7 +225,7 @@ This is often caused by bins of low counts. Use a more stringent filtering of bi This can be solved by a more stringent z-score values for the filter threshold or by a look at the plotted matrix. For example, chromosomes with 0 reads in its bins can be excluded from the correction by not defining it for the set of chromosomes that should be corrected (parameter 'Include chromosomes'). -### Plotting the corrected Hi-C matrix +## Plotting the corrected Hi-C matrix We can now plot chromosome 2L with the corrected matrix. @@ -240,7 +240,7 @@ We can now plot chromosome 2L with the corrected matrix. {: .hands_on} -### Load new data +## Load new data The steps so far would have led to long run times if real data would have been used. We therefore prepared a new matrix for you, `corrected contact matrix dm3 large`. Please load it into your history. From dfbbfd56f034b4b4d901ce319e4a25833afb0dca Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 18:01:27 +0200 Subject: [PATCH 27/41] fix links for linter --- .../genome-annotation/tutorials/genome-annotation/tutorial.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/topics/genome-annotation/tutorials/genome-annotation/tutorial.md b/topics/genome-annotation/tutorials/genome-annotation/tutorial.md index a19eac923b1597..c4ace35a8fda0d 100644 --- a/topics/genome-annotation/tutorials/genome-annotation/tutorial.md +++ b/topics/genome-annotation/tutorials/genome-annotation/tutorial.md @@ -117,7 +117,7 @@ At first you need to identify those structures of the genome which code for prot > Aragorn in depth > -> read more about **Aragorn** [here](https://nar.oxfordjournals.org/content/32/1/11.full.pdf+html). +> read more about **Aragorn** [in this publication](https://nar.oxfordjournals.org/content/32/1/11.full.pdf+html). {: .details} # Functional Annotation @@ -209,7 +209,7 @@ This file will be the input for more detailed analysis: > `vsearch` in depth > -> Documentation for vsearch see [here](https://github.com/torognes/vsearch). +> Documentation for vsearch available via their [GitHub repository](https://github.com/torognes/vsearch). {: .details} * **Diamond**: Diamond is a high-throughput program for aligning a file of short reads against a protein reference database such as NR, at 20,000 times the speed of Blastx, with high sensitivity. From 1063c66a35395a48549cac72a50210fb7861e236 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 18:03:27 +0200 Subject: [PATCH 28/41] fix heading levels --- topics/metabolomics/tutorials/lcms/tutorial.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/topics/metabolomics/tutorials/lcms/tutorial.md b/topics/metabolomics/tutorials/lcms/tutorial.md index 5729e20598d044..56bba435749ffa 100644 --- a/topics/metabolomics/tutorials/lcms/tutorial.md +++ b/topics/metabolomics/tutorials/lcms/tutorial.md @@ -214,7 +214,7 @@ Note that you can either: {: .tip} -#### Prepare your sampleMetadata file +### Prepare your sampleMetadata file The sampleMetadata file is a tab-separated table, in text format. This table has to be filled by the user. You can use any software you find appropriate to construct your table, as long as you save your file in a compatible format. For example, you can @@ -287,7 +287,7 @@ Once your sampleMetadata table is ready, you can proceed to the upload. In this {: .tip} -#### Upload the sampleMetada file with 'Get data' +### Upload the sampleMetada file with 'Get data' > Upload the sampleMetada > From f3b82b93442ae9a3e6a9e84ff448bdd50bdfb842 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 18:17:14 +0200 Subject: [PATCH 29/41] fix linting errors --- .../tutorials/multiplex-tissue-imaging-TMA/tutorial.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/topics/imaging/tutorials/multiplex-tissue-imaging-TMA/tutorial.md b/topics/imaging/tutorials/multiplex-tissue-imaging-TMA/tutorial.md index 6d72adafef7d53..3887310e446adc 100644 --- a/topics/imaging/tutorials/multiplex-tissue-imaging-TMA/tutorial.md +++ b/topics/imaging/tutorials/multiplex-tissue-imaging-TMA/tutorial.md @@ -33,7 +33,7 @@ Find a full [example history](https://cancer.usegalaxy.org/u/watsocam/h/gtnexemp ![Aviator screenshot, described in figure caption](../../images/multiplex-tissue-imaging-TMA/ex2_combined_avivator.png "Fully registered image of the MCMICRO Exemplar-002 Tissue microarray. Exemplar-002 consists of four cores, each with a distinct tissue organization and expression of biomarkers. In the image, there are six biomarkers shown: DNA (white), CD163 (yellow), CD3D (blue), CD31 (red), VDAC1 (green), and Keratin (orange). This image is being viewed using Avivator, an interactive tool that allows the user to selectively view channels and adjust channel intensities.") -> ### Agenda +> > > In this tutorial, we will cover: > @@ -233,7 +233,7 @@ The quantification step will produce a CSV cell feature table for every image in {: .hands_on} -# **Convert McMicro Output to Anndata** +# Convert McMicro Output to Anndata Anndata ({% cite Virshup2021 %}) is a Python package and file format schema for working with annotated data matrices that has gained popularity in the single-cell analysis community. Many downstream analysis tools, including Scimap from MCMICRO, Scanpy ({% cite Wolf2018 %}), and Squidpy ({% cite Palla2022 %}) are built around anndata format files (h5ad). This tool splits the marker intensity data into a separate dataframe (`X`), and places all observational data (spatial coordinates, morphological features, etc.) in the cell feature table into a separate dataframe (`obs`) that shares the same indices as `X`. In downstream analyses, new categorical variables, such as phenotype assignments for each cell, are stored in the `obs` dataframe. @@ -256,7 +256,7 @@ Learn more about this file format at the [anndata documentation](https://anndata {: .hands_on} -# Scimap: **Single Cell Phenotyping** +# Scimap: Single Cell Phenotyping There are several ways to classify cells available in Galaxy-ME. Unsupervised approaches, such as Leiden clustering, can be performed on all cells and phenotypes can be manually annotated based on marker expression patterns observed by the user. This approach is time consuming, so here we will demonstrate automated phenotyping based on thresholds of specific lineage markers using MCMICRO's Scimap. Scimap phenotyping can either be provided a table of manual gate values for each marker of interest (which can be determined using the **GateFinder** tool in Galaxy-ME), or by default, Scimap will fit a Gaussian Mixture Model (GMM) to the `log(intensity)` data for each marker to determine positive and negative populations for that marker. The marker intensity values are rescaled between (0,1) with 0.5 being the cut-off between negative and positive populations. Scimap uses a 'Phenotype workflow' to guide the classification of cells (Figure 5.). For more on how to construct a Scimap workflow, see the [Scimap documentation](https://scimap-doc.readthedocs.io/en/latest/tutorials/scimap-tutorial-cell-phenotyping/). @@ -303,7 +303,7 @@ UNetCoreograph outputs each individual core image in `tiff` format. Interactive {: .hands_on} -## **Rename OME-TIFF Channels** +## Rename OME-TIFF Channels Some tools can cause the channel names in an OME-TIFF image to be lost. To fix this, or to change the channel names to whatever the user prefers, the **Rename OME-TIFF Channels** tool can be invoked using a markers file similar to the one used in previous steps. From e5ad7b4d0cba3651c6a07202a3609a8a0dd6f4de Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 18:25:04 +0200 Subject: [PATCH 30/41] add citations --- .../tutorials/hicexplorer/tutorial.bib | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/topics/epigenetics/tutorials/hicexplorer/tutorial.bib b/topics/epigenetics/tutorials/hicexplorer/tutorial.bib index e69de29bb2d1d6..a16dd6263efa33 100644 --- a/topics/epigenetics/tutorials/hicexplorer/tutorial.bib +++ b/topics/epigenetics/tutorials/hicexplorer/tutorial.bib @@ -0,0 +1,24 @@ +@article{Ramrez2017, + title = {High-resolution TADs reveal DNA sequences underlying genome organization in flies}, + url = {http://dx.doi.org/10.1101/115063}, + DOI = {10.1101/115063}, + publisher = {Cold Spring Harbor Laboratory}, + author = {Ramírez, Fidel and Bhardwaj, Vivek and Villaveces, José and Arrigoni, Laura and Gr\"{u}ning, Bj\"{o}rn A. and Lam, Kin Chung and Habermann, Bianca and Akhtar, Asifa and Manke, Thomas}, + year = {2017}, + month = mar +} + +@article{Rao2014, + title = {A 3D Map of the Human Genome at Kilobase Resolution Reveals Principles of Chromatin Looping}, + volume = {159}, + ISSN = {0092-8674}, + url = {http://dx.doi.org/10.1016/j.cell.2014.11.021}, + DOI = {10.1016/j.cell.2014.11.021}, + number = {7}, + journal = {Cell}, + publisher = {Elsevier BV}, + author = {Rao, Suhas S.P. and Huntley, Miriam H. and Durand, Neva C. and Stamenova, Elena K. and Bochkov, Ivan D. and Robinson, James T. and Sanborn, Adrian L. and Machol, Ido and Omer, Arina D. and Lander, Eric S. and Aiden, Erez Lieberman}, + year = {2014}, + month = dec, + pages = {1665–1680} +} From 4b0d213999dc3f364a9409d554e9ffe22c519edd Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 18:26:47 +0200 Subject: [PATCH 31/41] fix link text --- topics/metabolomics/tutorials/lcms/tutorial.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/topics/metabolomics/tutorials/lcms/tutorial.md b/topics/metabolomics/tutorials/lcms/tutorial.md index 56bba435749ffa..73aae3621737a5 100644 --- a/topics/metabolomics/tutorials/lcms/tutorial.md +++ b/topics/metabolomics/tutorials/lcms/tutorial.md @@ -994,8 +994,7 @@ dataset size) and your study design. You should think carefully about what is ap In this tutorial, we will take the example of univariate analysis, using the `bmi` column of the **sampleMetadata file** as the study's biological factor investigated (body mass index). Since this variable is quantitative, we will chose in this example to measure the link between the BMI and the measured ions using a **statistical correlation calculation**. For more examples of -statistical analysis performed on LC-MS data, you can take a few minutes to watch the [usemetabo.org](https://usemetabo.org) open course video -[here](https://usemetabo.org/courses/w4mlc-ms-statistical-analysis). +statistical analysis performed on LC-MS data, you can take a few minutes to watch the [usemetabo.org](https://usemetabo.org) open [course video](https://usemetabo.org/courses/w4mlc-ms-statistical-analysis). ## Computation of statistical indices From dc19239bdc6a333320672c9ba814b3e47d859358 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 18:28:00 +0200 Subject: [PATCH 32/41] fix headings --- topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md b/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md index 431a8215984e7a..c56d7c5c6a03d8 100644 --- a/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md +++ b/topics/metabolomics/tutorials/lcms-preprocessing/tutorial.md @@ -260,7 +260,7 @@ Note that you can either: {: .tip} -#### Prepare your sampleMetadata file +### Prepare your sampleMetadata file The sampleMetadata file is a tab-separated table, in text format. This table has to be filled by the user. You can use any software you find appropriate to construct your table, as long as you save your file in a compatible format. For example, you can @@ -351,7 +351,7 @@ Once your sampleMetadata table is ready, you can proceed to the upload. In this {: .warning} -#### Upload the sampleMetada file with 'Get data' +### Upload the sampleMetada file with 'Get data' > Upload the sampleMetada > From d550510eda389d49c33b023ac8b62a63d845a72c Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 18:32:50 +0200 Subject: [PATCH 33/41] linter fixes --- .../tutorials/genome-annotation/tutorial.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/topics/genome-annotation/tutorials/genome-annotation/tutorial.md b/topics/genome-annotation/tutorials/genome-annotation/tutorial.md index c4ace35a8fda0d..97df870a527ad6 100644 --- a/topics/genome-annotation/tutorials/genome-annotation/tutorial.md +++ b/topics/genome-annotation/tutorials/genome-annotation/tutorial.md @@ -39,19 +39,19 @@ It consists of three main steps: # Introduction into File Formats -**FASTA** +## FASTA DNA and protein sequences are written in FASTA format where you have in the first line a ">" followed by the description. In the second line the sequence starts. ![FASTA file](../../images/fasta_format.png) -**GFF3** +## GFF3 The general feature format (gene-finding format, generic feature format, GFF) is a file format used for describing genes and other features of DNA, RNA and protein sequences. GFF3 overview -**GENBANK** +## GENBANK The genbank sequence format is a rich format for storing sequences and associated annotations. From 4e24deee2f2aaa484dd4fa0ed26c68ebc0c22174 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 18:32:54 +0200 Subject: [PATCH 34/41] linter fixes --- topics/metabolomics/tutorials/lcms/tutorial.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/topics/metabolomics/tutorials/lcms/tutorial.md b/topics/metabolomics/tutorials/lcms/tutorial.md index 73aae3621737a5..29d6f2b6e62425 100644 --- a/topics/metabolomics/tutorials/lcms/tutorial.md +++ b/topics/metabolomics/tutorials/lcms/tutorial.md @@ -874,7 +874,7 @@ to get rid of it. > {: .hands_on} -**What transformation has this tool done to the ions' intensities?** +### What transformation has this tool done to the ions' intensities? For each ion independently, the normalisation process works as described in the following picture: From 28919663fcb2a2bc482efa9927f099e2c2ed7b92 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 18:44:08 +0200 Subject: [PATCH 35/41] add some more funding contributors as suggested by @bgruening in #4913 --- .../tutorials/galaxy-intro-101/tutorial.md | 25 +++++++++++++------ .../tutorials/galaxy-intro-short/tutorial.md | 5 +++- .../galaxy-intro-strands/tutorial.md | 12 ++++++--- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/topics/introduction/tutorials/galaxy-intro-101/tutorial.md b/topics/introduction/tutorials/galaxy-intro-101/tutorial.md index c9c7cabfe74f5b..3304938a00c0c5 100644 --- a/topics/introduction/tutorials/galaxy-intro-101/tutorial.md +++ b/topics/introduction/tutorials/galaxy-intro-101/tutorial.md @@ -23,14 +23,23 @@ key_points: - Galaxy provides ways to share your results and methods with others subtopic: core priority: 2 -contributors: -- shiltemann -- nsoranzo -- blankclemens -- nekrut -- bgruening -- pajanne -- hexylena + +contributions: + authorship: + - shiltemann + - nsoranzo + - blankclemens + - nekrut + - bgruening + - pajanne + - hexylena + funding: + - psu + - erasmusmc + - elixir-europe + - uni-freiburg + - deNBI + recordings: - captioners: - shiltemann diff --git a/topics/introduction/tutorials/galaxy-intro-short/tutorial.md b/topics/introduction/tutorials/galaxy-intro-short/tutorial.md index a03e585d165b97..113b490a1dfa55 100644 --- a/topics/introduction/tutorials/galaxy-intro-short/tutorial.md +++ b/topics/introduction/tutorials/galaxy-intro-short/tutorial.md @@ -36,7 +36,10 @@ contributions: editing: - bebatut - ahmedhamidawan - + funding: + - AustralianBioCommons + - elixir-europe + - uni-freiburg --- # Overview diff --git a/topics/introduction/tutorials/galaxy-intro-strands/tutorial.md b/topics/introduction/tutorials/galaxy-intro-strands/tutorial.md index c2f91b857becb0..99b116e83eeb27 100644 --- a/topics/introduction/tutorials/galaxy-intro-strands/tutorial.md +++ b/topics/introduction/tutorials/galaxy-intro-strands/tutorial.md @@ -17,9 +17,15 @@ key_points: - "Galaxy can connect to external sources for data import and visualization purposes" - "Galaxy provides ways to share your results and methods with others" subtopic: next-steps -contributors: - - tnabtaf - - gallardoalba + +contributions: + authorship: + - tnabtaf + - gallardoalba + funding: + - elixir-europe + - deNBI + - uni-freiburg recordings: - youtube_id: P1mEWZ_tAgQ From 35b604128455b461895bfbbacda4ad1014242800 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 18:47:46 +0200 Subject: [PATCH 36/41] fix indent error --- .../introduction/tutorials/galaxy-intro-short/tutorial.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/topics/introduction/tutorials/galaxy-intro-short/tutorial.md b/topics/introduction/tutorials/galaxy-intro-short/tutorial.md index 113b490a1dfa55..e866e4285ea8b5 100644 --- a/topics/introduction/tutorials/galaxy-intro-short/tutorial.md +++ b/topics/introduction/tutorials/galaxy-intro-short/tutorial.md @@ -36,10 +36,10 @@ contributions: editing: - bebatut - ahmedhamidawan - funding: - - AustralianBioCommons - - elixir-europe - - uni-freiburg + funding: + - AustralianBioCommons + - elixir-europe + - uni-freiburg --- # Overview From 4aeca3d9f7a19247a08a3ee14032a3ecca9b2ad2 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 19:25:32 +0200 Subject: [PATCH 37/41] add gallantries as funding type contributor --- .../tutorials/flye-assembly/tutorial.md | 16 ++++++----- .../tutorials/cli-advanced/tutorial.md | 6 +++-- .../tutorials/cli-bashcrawl/tutorial.md | 8 +++--- .../tutorials/cli-basics/tutorial.md | 6 +++-- .../python-advanced-np-pd/tutorial.md | 4 ++- .../tutorials/python-basics/tutorial.md | 4 ++- .../tutorials/python-plotting/tutorial.md | 4 ++- .../tutorials/r-advanced/tutorial.md | 4 ++- .../tutorials/r-dplyr/tutorial.md | 10 ++++--- .../tutorials/rstudio/tutorial.md | 4 ++- .../tutorials/apollo/slides.html | 4 ++- .../tutorials/mapping/slides.html | 5 +++- .../tutorials/quality-control/slides.html | 4 ++- .../tutorials/quality-control/tutorial.md | 27 ++++++++++--------- .../tutorials/intro-to-ml-with-r/tutorial.md | 4 ++- .../rna-seq-counts-to-viz-in-r/tutorial.md | 5 +++- .../tutorials/circos/slides.html | 4 ++- .../tutorials/circos/tutorial.md | 5 +++- .../tutorials/jbrowse/tutorial.md | 4 ++- 19 files changed, 85 insertions(+), 43 deletions(-) diff --git a/topics/assembly/tutorials/flye-assembly/tutorial.md b/topics/assembly/tutorials/flye-assembly/tutorial.md index a4d1d67dbe48a2..d42407a3c530d7 100644 --- a/topics/assembly/tutorials/flye-assembly/tutorial.md +++ b/topics/assembly/tutorials/flye-assembly/tutorial.md @@ -17,13 +17,15 @@ level: Intermediate key_points: - PacBio data allows to perform good quality genome assembly - Quast and BUSCO make it easy to compare the quality of assemblies -contributors: -- abretaud -- alexcorm -- r1corre -- lleroi -- stephanierobin -- gallantries +contributions: + authorship: + - abretaud + - alexcorm + - r1corre + - lleroi + - stephanierobin + funding: + - gallantries follow_up_training: - type: internal diff --git a/topics/data-science/tutorials/cli-advanced/tutorial.md b/topics/data-science/tutorials/cli-advanced/tutorial.md index 1bf456fd3da03a..fd1ff3576e3375 100644 --- a/topics/data-science/tutorials/cli-advanced/tutorial.md +++ b/topics/data-science/tutorials/cli-advanced/tutorial.md @@ -63,12 +63,14 @@ key_points: notebook: language: bash subtopic: bash -contributors: +contributions: + authorship: - carpentries - hexylena - bazante1 - - gallantries - avans-atgm + funding: + - gallantries tags: - bash --- diff --git a/topics/data-science/tutorials/cli-bashcrawl/tutorial.md b/topics/data-science/tutorials/cli-bashcrawl/tutorial.md index bee6c2d6c1e949..5e79c18c1e566a 100644 --- a/topics/data-science/tutorials/cli-bashcrawl/tutorial.md +++ b/topics/data-science/tutorials/cli-bashcrawl/tutorial.md @@ -27,9 +27,11 @@ key_points: notebook: language: bash subtopic: bash -contributors: -- hexylena -- gallantries +contributions: + authorship: + - hexylena + funding: + - gallantries tags: - game - bash diff --git a/topics/data-science/tutorials/cli-basics/tutorial.md b/topics/data-science/tutorials/cli-basics/tutorial.md index 83325326dae464..4ff0f09bd755a5 100644 --- a/topics/data-science/tutorials/cli-basics/tutorial.md +++ b/topics/data-science/tutorials/cli-basics/tutorial.md @@ -63,12 +63,14 @@ key_points: notebook: language: bash subtopic: bash -contributors: +contributions: + authorship: - carpentries - hexylena - bazante1 - - gallantries - avans-atgm + funding: + - gallantries tags: - bash --- diff --git a/topics/data-science/tutorials/python-advanced-np-pd/tutorial.md b/topics/data-science/tutorials/python-advanced-np-pd/tutorial.md index b70729b78dced0..58e25b7f9dadda 100644 --- a/topics/data-science/tutorials/python-advanced-np-pd/tutorial.md +++ b/topics/data-science/tutorials/python-advanced-np-pd/tutorial.md @@ -24,10 +24,12 @@ key_points: - Python has many libraries offering a variety of capabilities, which makes it popular for beginners, as well as, more experienced users - You can use scientific libraries like Numpy and Pandas to perform data analysis. subtopic: python -contributors: +contributions: + authorship: - mcmaniou - fpsom - carpentries + funding: - gallantries priority: 2 diff --git a/topics/data-science/tutorials/python-basics/tutorial.md b/topics/data-science/tutorials/python-basics/tutorial.md index 64b2d0b565daca..6d1a65a0c382e4 100644 --- a/topics/data-science/tutorials/python-basics/tutorial.md +++ b/topics/data-science/tutorials/python-basics/tutorial.md @@ -23,10 +23,12 @@ key_points: - Python is a fairly easy programming language to learn and use, but be mindful of the indexing. - Python has many libraries offering a variety of capabilities, which makes it popular for beginners, as well as, more experienced users subtopic: python -contributors: +contributions: + authorship: - mcmaniou - fpsom - carpentries + funding: - gallantries priority: 1 diff --git a/topics/data-science/tutorials/python-plotting/tutorial.md b/topics/data-science/tutorials/python-plotting/tutorial.md index fa101387c0a786..0070e9a7f7649c 100644 --- a/topics/data-science/tutorials/python-plotting/tutorial.md +++ b/topics/data-science/tutorials/python-plotting/tutorial.md @@ -23,10 +23,12 @@ key_points: - Python has many libraries offering a variety of capabilities, which makes it popular for beginners, as well as, more experienced users - You can use scientific libraries like Matplotlib to perform exploratory data analysis. subtopic: python -contributors: +contributions: + authorship: - mcmaniou - fpsom - carpentries + funding: - gallantries priority: 3 diff --git a/topics/data-science/tutorials/r-advanced/tutorial.md b/topics/data-science/tutorials/r-advanced/tutorial.md index 03cb724322b940..033df963a80bb7 100644 --- a/topics/data-science/tutorials/r-advanced/tutorial.md +++ b/topics/data-science/tutorials/r-advanced/tutorial.md @@ -40,11 +40,13 @@ key_points: - Pipes can be used to combine simple operations into complex procedures. subtopic: r priority: 2 -contributors: +contributions: + authorship: - carpentries - bebatut - fpsom - tobyhodges + funding: - gallantries tags: - R diff --git a/topics/data-science/tutorials/r-dplyr/tutorial.md b/topics/data-science/tutorials/r-dplyr/tutorial.md index 29cfd9e26ce5ab..be0400ee66755c 100644 --- a/topics/data-science/tutorials/r-dplyr/tutorial.md +++ b/topics/data-science/tutorials/r-dplyr/tutorial.md @@ -22,10 +22,12 @@ time_estimation: 1H key_points: - Dplyr and tidyverse make it a lot easier to process data - The functions for selecting data are a lot easier to understand than R's built in alternatives. -contributors: -- hexylena -- gallantries -- avans-atgm +contributions: + authorship: + - hexylena + - avans-atgm + funding: + - gallantries subtopic: r notebook: language: r diff --git a/topics/galaxy-interface/tutorials/rstudio/tutorial.md b/topics/galaxy-interface/tutorials/rstudio/tutorial.md index d6ad9bcca1df0a..4ba154ccf1baba 100644 --- a/topics/galaxy-interface/tutorials/rstudio/tutorial.md +++ b/topics/galaxy-interface/tutorials/rstudio/tutorial.md @@ -20,10 +20,12 @@ tags: - interactive-tools key_points: - Why it's helpful to be able to work with R interactively within Galaxy -contributors: +contributions: + authorship: - bebatut - fpsom - tobyhodges + funding: - gallantries subtopic: analyse --- diff --git a/topics/genome-annotation/tutorials/apollo/slides.html b/topics/genome-annotation/tutorials/apollo/slides.html index 46d577f0839522..135b26d3854480 100644 --- a/topics/genome-annotation/tutorials/apollo/slides.html +++ b/topics/genome-annotation/tutorials/apollo/slides.html @@ -24,11 +24,13 @@ - Apollo allows a group to view and manually refine predicted genome annotations - Use Apollo to edit annotations within your group. - Export manual annotations as GFF3. -contributors: +contributions: + authorship: - abretaud - hexylena - nathandunn - mboudet + funding: - gallantries recordings: diff --git a/topics/sequence-analysis/tutorials/mapping/slides.html b/topics/sequence-analysis/tutorials/mapping/slides.html index cff4630af32b3c..5ddc4ed246a630 100644 --- a/topics/sequence-analysis/tutorials/mapping/slides.html +++ b/topics/sequence-analysis/tutorials/mapping/slides.html @@ -19,12 +19,15 @@ - Choice of mapper can affect downstream results - Know your data! - Genome browsers can be used to view aligned reads -contributors: +contributions: + authorship: - joachimwolff - shiltemann - EngyNasr - gallardoalba + funding: - gallantries + - elixir-europe recordings: - captioners: diff --git a/topics/sequence-analysis/tutorials/quality-control/slides.html b/topics/sequence-analysis/tutorials/quality-control/slides.html index 2559bd2c76ea02..84daf4aa3b0a31 100644 --- a/topics/sequence-analysis/tutorials/quality-control/slides.html +++ b/topics/sequence-analysis/tutorials/quality-control/slides.html @@ -20,13 +20,15 @@ - Run quality control on every sequencing dataset before any other analyses - Choose QC parameters carefully - Re-run FastQC to check the impact of the quality control -contributors: +contributions: + authorship: - bebatut - abretaud - alexcorm - lleroi - r1corre - stephanierobin + funding: - gallantries diff --git a/topics/sequence-analysis/tutorials/quality-control/tutorial.md b/topics/sequence-analysis/tutorials/quality-control/tutorial.md index 51d8629eac4fef..ab9e599a8da411 100644 --- a/topics/sequence-analysis/tutorials/quality-control/tutorial.md +++ b/topics/sequence-analysis/tutorials/quality-control/tutorial.md @@ -26,16 +26,19 @@ key_points: - Check the impact of the quality control - Different tools are available to provide additional quality metrics - For paired-end reads analyze the forward and reverse reads together -contributors: -- bebatut -- mblue9 -- alexcorm -- abretaud -- lleroi -- r1corre -- stephanierobin -- gallantries -- neoformit +contributions: + authorship: + - bebatut + - mblue9 + - alexcorm + - abretaud + - lleroi + - r1corre + - stephanierobin + - neoformit + funding: + - gallantries + recordings: - youtube_id: coaMGvZazoc length: 50M @@ -58,9 +61,9 @@ recordings: length: 51M galaxy_version: 24.1.2.dev0 date: '2024-09-30' - speakers: + speakers: - dianichj - captioners: + captioners: - dianichj bot-timestamp: 1727710795 diff --git a/topics/statistics/tutorials/intro-to-ml-with-r/tutorial.md b/topics/statistics/tutorials/intro-to-ml-with-r/tutorial.md index 7e19d39ea08f3e..03714aea110f33 100644 --- a/topics/statistics/tutorials/intro-to-ml-with-r/tutorial.md +++ b/topics/statistics/tutorials/intro-to-ml-with-r/tutorial.md @@ -41,8 +41,10 @@ tags: - interactive-tools key_points: - To be added -contributors: +contributions: + authorship: - fpsom + funding: - gallantries recordings: diff --git a/topics/transcriptomics/tutorials/rna-seq-counts-to-viz-in-r/tutorial.md b/topics/transcriptomics/tutorials/rna-seq-counts-to-viz-in-r/tutorial.md index a4edc1c4f9fecf..5a3ae2a853970f 100644 --- a/topics/transcriptomics/tutorials/rna-seq-counts-to-viz-in-r/tutorial.md +++ b/topics/transcriptomics/tutorials/rna-seq-counts-to-viz-in-r/tutorial.md @@ -33,10 +33,13 @@ tags: - interactive-tools key_points: - When creating plots with `ggplot2`, think about the graphics in layers (aesthetics, geometry, statistics, scale transformation, and grouping). -contributors: + +contributions: + authorship: - bebatut - fpsom - tobyhodges + funding: - gallantries recordings: diff --git a/topics/visualisation/tutorials/circos/slides.html b/topics/visualisation/tutorials/circos/slides.html index 7b1456b8c9c8d6..7825557f921aa2 100644 --- a/topics/visualisation/tutorials/circos/slides.html +++ b/topics/visualisation/tutorials/circos/slides.html @@ -12,9 +12,11 @@ - Circos is very powerful, but also very complex - Creating plots is an iterative process - The full configuration directory can be downloaded from Galaxy to be further tweaked locally -contributors: +contributions: + authorship: - hexylena - shiltemann + funding: - gallantries recordings: diff --git a/topics/visualisation/tutorials/circos/tutorial.md b/topics/visualisation/tutorials/circos/tutorial.md index f3259d8e0c08a0..f2f1adf7ea5a3d 100644 --- a/topics/visualisation/tutorials/circos/tutorial.md +++ b/topics/visualisation/tutorials/circos/tutorial.md @@ -13,10 +13,13 @@ time_estimation: "2h" key_points: - "Circos is an effective tool to make circular visualisation of high-dimensional datasets" - "Circos is often used for genomics, but can also be used for other types of data" -contributors: +contributions: + authorship: - shiltemann - hexylena - gallardoalba + funding: + - gallantries level: Intermediate recordings: diff --git a/topics/visualisation/tutorials/jbrowse/tutorial.md b/topics/visualisation/tutorials/jbrowse/tutorial.md index 6d7216388010d7..ada2ccc97d323d 100644 --- a/topics/visualisation/tutorials/jbrowse/tutorial.md +++ b/topics/visualisation/tutorials/jbrowse/tutorial.md @@ -18,9 +18,11 @@ level: Intermediate tags: - gmod - jbrowse1 -contributors: +contributions: + authorship: - hexylena - shiltemann + funding: - gallantries --- From d5243980ba7e70273ce241bff0771b3f50861081 Mon Sep 17 00:00:00 2001 From: Saskia Hiltemann Date: Tue, 8 Oct 2024 19:26:18 +0200 Subject: [PATCH 38/41] only show funder section if defined in metadata --- _layouts/tutorial_hands_on.html | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/_layouts/tutorial_hands_on.html b/_layouts/tutorial_hands_on.html index bc89cc0c770fc1..5611b51b40ca5e 100644 --- a/_layouts/tutorial_hands_on.html +++ b/_layouts/tutorial_hands_on.html @@ -291,7 +291,7 @@

You've Finished the Tutorial

function tutorial_finish() { if(typeof plausible !== 'undefined'){ // Plausible may be undefined (script blocked) - // or it may be defined, but opted-out (select box/DNT), + // or it may be defined, but opted-out (select box/DNT), // which means `plausible()` will work but not send data, *nor* execute the callback. plausible('TutorialComplete', {props: {path: document.location.pathname}}) } @@ -427,11 +427,12 @@

{{locale['citing-tutorial'] | default: "Citing this Tutorial"}}

+ {% if page.contributions and page.contributions.funding %}

{{locale['references']| default: "Funding" }}

These individuals or organisations provided funding support for the development of this resource

- {% if page.contributions %} + {% include _includes/funding-statement.md funders=page.contributions.funding %} - {% endif %} + {% endif %}