diff --git a/.eslintignore b/.eslintignore new file mode 100644 index 0000000000..639bc2b2bf --- /dev/null +++ b/.eslintignore @@ -0,0 +1 @@ +/docs/* \ No newline at end of file diff --git a/.github/workflows/.htmlhintrc b/.github/workflows/.htmlhintrc new file mode 100644 index 0000000000..4d1e298c21 --- /dev/null +++ b/.github/workflows/.htmlhintrc @@ -0,0 +1,3 @@ +{ + "head-script-disabled": false +} diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index d33ba75e24..5ffe98db01 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -39,6 +39,7 @@ jobs: VALIDATE_ANSIBLE: false VALIDATE_CHECKOV: false VALIDATE_JSCPD: false + VALIDATE_LATEX: false FIX_YAML_PRETTIER: true VALIDATE_JAVASCRIPT_PRETTIER: false VALIDATE_JAVASCRIPT_STANDARD: false diff --git a/bin/convert_ncsu_excel_to_obo.pl b/bin/convert_ncsu_excel_to_obo.pl new file mode 100644 index 0000000000..990dda263a --- /dev/null +++ b/bin/convert_ncsu_excel_to_obo.pl @@ -0,0 +1,336 @@ + +=head1 NAME + +convert_excel_to_obo - a script to convert a spreadsheet based representation of an ontology to an obo file format + +=head1 DESCRIPTION + +Based on CXGN::File::Parse, this script can parse tab delimited or Excel formats (xls or xlsx) as follows: + +perl convert_excel_to_obo.pl -n CO_999 -i file.xlsx -o ontology.obo + +=head1 AUTHOR + +Lukas Mueller + +October 2024 + +=cut + +use strict; + +use utf8; +use Getopt::Std; +use Data::Dumper; +use CXGN::File::Parse; + +our ($opt_n, $opt_i, $opt_o, $opt_h); + +getopts('n:i:o:h'); + +my $file = $opt_i; +my $ontology_name = $opt_n || "GENERIC"; + +if (!$file) { + die "Please privde a file using the -i parameter."; +} + +my $outfile = $file.".obo"; +my $cvpropfile = $file.".props"; + +open(my $F, ">", $outfile) || die "Can't open file $outfile\n"; +open(my $G, ">", $cvpropfile) || die "Can't open cvprop file $cvpropfile for writing"; + +#Curation Variable ID Variable name Variable label Variable description Variable synonyms Context of use Growth stage Variable status Variable Xref Institution Scientist Date Language Crop Trait ID Entity Attribute Trait name Trait class Trait description Trait synonyms Main trait abbreviation Alternative trait abbreviations Trait status Trait Xref Method ID Method name Method class Method description Method Abbreviation Formula Method reference Scale ID Scale name Scale Abbreviation Scale class Scale Xref Cat 1 code Cat 1 description Cat 2 code Cat 2 description Cat 3 code Cat 3 description Cat 4 code Cat 4 description Cat 5 code Cat 5 description Cat 6 code Cat 6 description Cat 7 code Cat 7 description Cat 8 code Cat 8 description Cat 9 code Cat 9 description Cat 10 code Cat 10 description + +my @col_headers = ("Variable"," Term Name - BB", "Trait class", "Term Definition", "Variable Full Name", "Synonyms", "Trait - CO", "Main trait abbreviation", "Entity", "Attribute", "Method Name", "Method class", "Method description", "Method Abbreviation", "Formula", "Scale name", "Scale abbreviation", "Scale class", "Category 1", "Category 2", "Category 3", "Category 4", "Category 5", "Category 6", "Category 7", "Category 8", "Category 9", "Category 10", "Category 11", "Category 12" ); + +# column labels +# +my $trait_class = "Trait class"; +my $trait_name = "Trait name"; +my $trait_definition = "Trait description"; +my $trait_synonyms = "Trait synonyms"; +my $variable_synonyms = "Variable synonyms"; +my $trait_id = "Trait ID"; +my $variable_name = "Variable name"; +my $variable_definition = "Variable description"; +my $variable_label = "Variable label"; +my $variable_id = "Variable ID"; +my $method_id = "Method ID"; +my $method_name = "Method name"; +my $method_class = "Method class"; +my $method_description = "Method description"; +my $scale_id = "Scale ID"; +my $scale_name = "Scale name"; +my $scale_class = "Scale class"; +my $scale_description = "Scale description"; +my $scale_abbreviation = "Scale abbreviation"; +my $entity = "Entity"; +my $attribute = "Attribute"; +my $categories = "Categories"; +my $class_id = "Class ID"; +my $class_name = "Class name"; + +my $parser = CXGN::File::Parse->new( file => $file ); + +my $parsed = $parser->parse(); + +if ($parsed->{errors}) { + warn "The following errors occurred while parsing file $file: ".Dumper($parsed->{errors})."\n"; +} + +my $data = $parsed->{data}; + +# get all the trait classes +# +my %trait_classes; +my %traits; +my %variables; + +foreach my $d (@$data) { + $trait_classes{$d->{$trait_class}}->{count}++; +} +print STDERR "TRAIT CLASSES: ".Dumper(\%trait_classes); + +foreach my $d (@$data) { + my $tn = $d->{$trait_name}; + print STDERR "Parsing TRAIT NAME $trait_name\n"; + if (! $tn) { next; } + $traits{$tn}->{$trait_id} = $d->{$trait_id}; + $traits{$tn}->{$trait_class} = $d->{$trait_class}; + + print STDERR "TRAIT NAME $trait_name has TRAIT CLASS $d->{$trait_class}\n"; + + $traits{$tn}->{$trait_definition} = $d->{$trait_definition}; +} + +print STDERR "TRAITS: ".Dumper(\%traits); + + + +foreach my $d (@$data) { + my $vn = $d->{$variable_name}; + if (! $vn) { next; } + $variables{$vn}->{$variable_id} = $d->{$variable_id}; + $variables{$vn}->{$variable_synonyms} = $d->{$variable_synonyms}; + $variables{$vn}->{$trait_name} = $d->{$trait_name}; + $variables{$vn}->{$trait_definition} = $d->{$trait_definition}; + $variables{$vn}->{$entity} = $d->{$entity}; + $variables{$vn}->{$attribute} = $d->{$attribute}; + $variables{$vn}->{$method_name} = $d->{$method_name}; + $variables{$vn}->{$scale_abbreviation} = $d->{$scale_abbreviation}; + $variables{$vn}->{$variable_label} = $d->{$variable_label}; + $variables{$vn}->{$scale_name} = $d->{$scale_name}; + $variables{$vn}->{$scale_class} = $d->{$scale_class}; + $variables{$vn}->{$categories} = $d->{$categories}; + print STDERR "TERM NAME - CO IN variable = $d->{$trait_name}\n"; + $variables{$vn}->{$trait_name} = $d->{$trait_name}; +} +print STDERR "VARIABLES: ".Dumper(\%variables); + + +my $root_id = format_ontology_id($opt_n, 0); +my $count = $root_id; +my $acc = sprintf "%07d", $count; # the number after the ontology name and a colon + +print STDERR "Starting at term $ontology_name:$acc ...\n"; + +# write obo header +# +print $F <; + +my $root_acc = $acc; +my $root_name = "ROOT"; + +print $F <{acc} = $class_id; + #$trait_classes{$k}->{name} = $k; + + $count++; + +} + +foreach my $k (sort keys %traits) { + print $F format_trait( + $ontology_name, + $traits{$k}->{$trait_id}, + $traits{$k}->{$variable_name}, + $traits{$k}->{$trait_definition}, + $traits{$k}->{$trait_synonyms}, + $trait_classes{ $traits{$k}->{$trait_class} }->{acc}, # parent id + $traits{$k}->{$trait_class}, # parent trait + )."\n"; + + $traits{$k}->{name} = $traits{$k}->{$trait_name}; + $traits{$k}->{acc} = $traits{$k}->{$trait_id}; + $count++; +} + + + +foreach my $k (sort keys %variables) { + + my $parent_trait = $variables{$k}->{$trait_name}; + my $parent_trait_id = $traits{$variables{$k}->{'Trait - CO'}}->{acc}; + my $parent_trait_name = $traits{ $variables{$k}->{'Trait -CO'}}->{name}; + + print STDERR "VARIABLE: $k. PARENT TRAIT: $parent_trait\n"; + + print $F format_variable( + $ontology_name, + $count, + $k, ###$variables{$k}->{'Variable Full Name'}, + join(" - ", $variables{$k}->{'Term Definition'}), + $variables{$k}->{'Synonym'}, + $traits{$variables{$k}->{'Trait - CO'}}->{acc}, # parent trait id + $traits{$variables{$k}->{'Trait - CO'}}->{name}, # parent trait + + )."\n"; + + print $G format_props( + $k, # variable name + $ontology_name, + $count, + $variables{$k}->{'Scale class'}, + $variables{$k}->{Categories}, + ); + + $count++; + +} + +close($F); +close($G); + +print STDERR "Script completed.\n"; + +sub format_props { + my $trait_name = shift; + my $ontology_name = shift; + my $count = shift; + my $trait_format = shift; + my $categories = shift; + + my $trait_default_value = shift; + my $trait_minimum = shift; + my $trait_maximum = shift; + my $trait_details = shift; + + return join ("\t", $trait_name."|".format_ontology_id($ontology_name, $count), $trait_format, $trait_default_value, $trait_minimum, $trait_maximum, $categories, $trait_details)."\n"; + + +} + + +sub format_ontology_id { + my $ontology_name = shift; + my $acc = shift; + + return $ontology_name.":".sprintf "%07d", $acc; +} + +sub format_trait { + my $ontology_code = shift; + my $id = shift; + my $name = shift; + my $description = shift; + my $synonyms = shift; + my $parent_class_id = shift; + my $parent_trait = shift; + + my $trait_id = format_ontology_id($ontology_code, $id); + my $parent_trait_id = format_ontology_id($ontology_code, $parent_class_id); + + my %record = ( + "[Term]" => "", + "id:" => $trait_id, + "name:" => $name, + "def:" => "\"$description\" []", + "synonym:" => $synonyms, + "namespace:" => $ontology_name, + "is_a:" => "$parent_trait_id ! $parent_trait", + ); + + my $data = ""; + foreach my $k ("[Term]", "id:", "name:", "def:", "synonym:", "namespace:", "is_a:") { + if (defined($record{$k})) { + $data .= "$k $record{$k}\n"; + } + } + + return $data; +} + + +sub format_variable { + my $ontology_code = shift; + my $id = shift; + my $name = shift; + my $description = shift; + my $synonyms = shift; + my $parent_trait_id = shift; + my $parent_trait_name = shift; + + #print STDERR "Parent trait name: $parent_trait_name\n"; + + my $variable_id = format_ontology_id($ontology_code, $id); + my $parent_trait_id = format_ontology_id($ontology_code, $parent_trait_id); + my %record = ( + "[Term]" => "", + "id:" => $variable_id, + "name:" => $name, + "def:"=> "\"$description\" []", + "synonym:" => $synonyms, + "namespace:" => $ontology_name, + "relationship:" => "variable_of $parent_trait_id ! $parent_trait_name", + ); + + my $data = ""; + foreach my $k ("[Term]", "id:", "name:", "def:", "synonym:", "namespace:", "relationship:") { + if (defined($record{$k})) { + $data .= "$k $record{$k}\n"; + } + } + + return $data; +} diff --git a/bin/load_trait_props.pl b/bin/load_trait_props.pl index 915304d38e..81b6fe8837 100755 --- a/bin/load_trait_props.pl +++ b/bin/load_trait_props.pl @@ -28,18 +28,21 @@ =head2 DESCRIPTION trait_maximum trait_categories trait_details + trait_repeat_type trait_name: the name of the variable human readable form (e.g., "plant height in cm") trait_format: can be numeric, qualitative, date or boolean trait_default_value: is the value if no value is given trait_categories: are the different possible names of the categories, separated by /, for example "1/2/3/4/5" trait_details: string describing the trait categories + trait_repeat_type: one of 'single', 'multiple', 'time_series' =head2 AUTHOR -Jeremy D. Edwards (jde22@cornell.edu) + Jeremy D. Edwards (jde22@cornell.edu) - initial script, April 2014 + Lukas Mueller (lam87@cornell.edu) - added trait_repeat_type, Feb 2024 + -April 2014 =head2 TODO diff --git a/db/00186/AddTraitPropRepeatType.pm b/db/00186/AddTraitPropRepeatType.pm new file mode 100644 index 0000000000..855583ac32 --- /dev/null +++ b/db/00186/AddTraitPropRepeatType.pm @@ -0,0 +1,102 @@ +#!/usr/bin/env perl + + +=head1 NAME + + AddTraitPropRepeatType + +=head1 SYNOPSIS + +mx-run AddTraitPropRepeatType [options] -H hostname -D dbname -u username [-F] + +this is a subclass of L +see the perldoc of parent class for more details. + +=head1 DESCRIPTION + +This dbpatch adds the trait_repeat_type property to the trait_property cv. + + +=head1 AUTHOR + +Lukas Mueller + +=head1 COPYRIGHT & LICENSE + +Copyright 2024 Boyce Thompson Institute for Plant Research + +This program is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + + +package AddTraitPropRepeatType; + +use Moose; +use Bio::Chado::Schema; +use Try::Tiny; + +extends 'CXGN::Metadata::Dbpatch'; + + +has '+description' => ( default => <<'' ); +Description of this patch goes here + +has '+prereq' => ( + default => sub { [] + }, + ); + +sub patch { + my $self=shift; + + print STDOUT "Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " ."; + + print STDOUT "\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n"; + + print STDOUT "\nExecuting the SQL commands.\n"; + + my %cvterms = ( + 'trait_property' => [ 'trait_repeat_type' ], + ); + + + my $schema = Bio::Chado::Schema->connect( sub { $self->dbh->clone } ); + + + my $coderef = sub { + + + foreach my $cv_name ( keys %cvterms ) { + print "\nKEY = $cv_name \n\n"; + my @cvterm_names = @{$cvterms{ $cv_name } } ; + + foreach my $cvterm_name ( @cvterm_names ) { + print "cvterm= $cvterm_name \n"; + my $new_cvterm = $schema->resultset("Cv::Cvterm")->create_with( + { + name => $cvterm_name, + cv => $cv_name, + }); + } + } + }; + + try { + $schema->txn_do($coderef); + + } catch { + die "Load failed! " . $_ . "\n" ; + }; + + + + +print "You successfully added the new property 'trait_repeat_type'!\n"; +} + + +#### +1; # +#### diff --git a/docs/404.html b/docs/404.html index 462aea4fef..c18ea74afd 100644 --- a/docs/404.html +++ b/docs/404.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/BreedbaseManual.pdf b/docs/BreedbaseManual.pdf index 9058263ba4..6010125e0b 100644 Binary files a/docs/BreedbaseManual.pdf and b/docs/BreedbaseManual.pdf differ diff --git a/docs/BreedbaseManual.tex b/docs/BreedbaseManual.tex index 4878f382d2..78c6e2d991 100644 --- a/docs/BreedbaseManual.tex +++ b/docs/BreedbaseManual.tex @@ -85,7 +85,7 @@ \title{User Manual of Breedbase} \author{Breedbase team} -\date{2024-12-13} +\date{2025-01-23} \begin{document} \maketitle @@ -1741,6 +1741,13 @@ \subsection{Uploading GPS Coordinates For Plots}\label{uploading-gps-coordinates This dialog tells you that the file must be XLS or XLSX and must contain: plot\_name WGS84\_bottom\_left\_x WGS84\_bottom\_left\_y WGS84\_bottom\_right\_x WGS84\_bottom\_right\_y WGS84\_top\_right\_x WGS84\_top\_right\_y WGS84\_top\_left\_x WGS84\_top\_left\_y The GPS coordinates should be WGS84 format and specify a four-pointed polygon around the plot. +\hypertarget{repetitive-measurements-section}{% +\subsection{Repetitive Measurements Section}\label{repetitive-measurements-section}} + +If a trial includes repetitive traits or time-series values, you can effectively view and analyze these values through the Repetitive Measurements Section. Start by selecting the desired trait from the trait drop-down menu. Next, define the date range by either using the date-range picker or an interactive slider, which allows you to dynamically adjust the period you wish to examine. Once the date range is set, determine how to handle the repetitive measurements by choosing from various options such as First Value, Last Value, Averaged Value, Sum Values, or All Values. Choosing the ``All Values'' option enables an additional feature that visualizes the trend of the values over time, helping you identify patterns and trends within the data. + +\begin{center}\includegraphics[width=0.95\linewidth]{assets/images/trial_detail_page_view_repetitive_measurements} \end{center} + \hypertarget{uploading-additional-files-to-trial}{% \subsection{Uploading Additional Files To Trial}\label{uploading-additional-files-to-trial}} diff --git a/docs/assets/images/trial_detail_page_view_repetitive_measurements.png b/docs/assets/images/trial_detail_page_view_repetitive_measurements.png new file mode 100644 index 0000000000..903be8d86b Binary files /dev/null and b/docs/assets/images/trial_detail_page_view_repetitive_measurements.png differ diff --git a/docs/assets/images/wizard_related_phenotypes_download.png b/docs/assets/images/wizard_related_phenotypes_download.png index 0829653ab4..c2ca9c3314 100644 Binary files a/docs/assets/images/wizard_related_phenotypes_download.png and b/docs/assets/images/wizard_related_phenotypes_download.png differ diff --git a/docs/basic-website-usage.html b/docs/basic-website-usage.html index ebc29fa128..7deb4ae9d5 100644 --- a/docs/basic-website-usage.html +++ b/docs/basic-website-usage.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/data-analysis-tools.html b/docs/data-analysis-tools.html index d8e65eae58..6520e8d558 100644 --- a/docs/data-analysis-tools.html +++ b/docs/data-analysis-tools.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/index.html b/docs/index.html index a008ebce95..8d1971e96d 100644 --- a/docs/index.html +++ b/docs/index.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • @@ -358,7 +359,7 @@

    Introduction

    diff --git a/docs/managing-accessions.html b/docs/managing-accessions.html index b9e0cf3df9..83fd05e3be 100644 --- a/docs/managing-accessions.html +++ b/docs/managing-accessions.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-barcodes.html b/docs/managing-barcodes.html index 3543822a2c..a6f6392d9a 100644 --- a/docs/managing-barcodes.html +++ b/docs/managing-barcodes.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-breeding-programs.html b/docs/managing-breeding-programs.html index 2ea1c6e6c7..9a078e83bb 100644 --- a/docs/managing-breeding-programs.html +++ b/docs/managing-breeding-programs.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-crosses.html b/docs/managing-crosses.html index 9b37435ccf..d4160fb677 100644 --- a/docs/managing-crosses.html +++ b/docs/managing-crosses.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-downloads.html b/docs/managing-downloads.html index d566d0ade8..074160869c 100644 --- a/docs/managing-downloads.html +++ b/docs/managing-downloads.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-field-trials.html b/docs/managing-field-trials.html index 9b1b31c0ba..7b1bf7e76d 100644 --- a/docs/managing-field-trials.html +++ b/docs/managing-field-trials.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • @@ -796,8 +797,13 @@

    10.2.11 Uploading GPS Coordinates

    This dialog tells you that the file must be XLS or XLSX and must contain: plot_name WGS84_bottom_left_x WGS84_bottom_left_y WGS84_bottom_right_x WGS84_bottom_right_y WGS84_top_right_x WGS84_top_right_y WGS84_top_left_x WGS84_top_left_y The GPS coordinates should be WGS84 format and specify a four-pointed polygon around the plot.

    -
    -

    10.2.12 Uploading Additional Files To Trial

    +
    +

    10.2.12 Repetitive Measurements Section

    +

    If a trial includes repetitive traits or time-series values, you can effectively view and analyze these values through the Repetitive Measurements Section. Start by selecting the desired trait from the trait drop-down menu. Next, define the date range by either using the date-range picker or an interactive slider, which allows you to dynamically adjust the period you wish to examine. Once the date range is set, determine how to handle the repetitive measurements by choosing from various options such as First Value, Last Value, Averaged Value, Sum Values, or All Values. Choosing the “All Values” option enables an additional feature that visualizes the trend of the values over time, helping you identify patterns and trends within the data.

    +

    +
    +
    +

    10.2.13 Uploading Additional Files To Trial

    It may be of interest to you to upload additional documents, images, or recordings to your trial. To do this, scroll down to the “Uploaded Additional File” section on the trial detail page. From here you can view and download any of these additional files.

    To upload an additional file, click on the “Upload Additional Files” link. A dialog will appear where you simply select your desired file. For information, you can click “Upload information” to see the following message.

    diff --git a/docs/managing-genotyping-plates.html b/docs/managing-genotyping-plates.html index 9dfed2da3c..ff687b4f9c 100644 --- a/docs/managing-genotyping-plates.html +++ b/docs/managing-genotyping-plates.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-image-data.html b/docs/managing-image-data.html index 6fd3a801ed..731ab2fecb 100644 --- a/docs/managing-image-data.html +++ b/docs/managing-image-data.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-locations.html b/docs/managing-locations.html index 554dc1a561..ba58439b5d 100644 --- a/docs/managing-locations.html +++ b/docs/managing-locations.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-observation-variables.html b/docs/managing-observation-variables.html index 8f0cba47ee..79d262f281 100644 --- a/docs/managing-observation-variables.html +++ b/docs/managing-observation-variables.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-odk-data-collection.html b/docs/managing-odk-data-collection.html index 0a168e9be7..29440b66bf 100644 --- a/docs/managing-odk-data-collection.html +++ b/docs/managing-odk-data-collection.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-outliers-in-dataset.html b/docs/managing-outliers-in-dataset.html index 73e94a2bd8..6ace482544 100644 --- a/docs/managing-outliers-in-dataset.html +++ b/docs/managing-outliers-in-dataset.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-phenotypic-data.html b/docs/managing-phenotypic-data.html index e8cf964289..c92419fb03 100644 --- a/docs/managing-phenotypic-data.html +++ b/docs/managing-phenotypic-data.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-populations.html b/docs/managing-populations.html index 1b01a31191..6817ac37a1 100644 --- a/docs/managing-populations.html +++ b/docs/managing-populations.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-seed-lots.html b/docs/managing-seed-lots.html index 28108d2a9f..8995143405 100644 --- a/docs/managing-seed-lots.html +++ b/docs/managing-seed-lots.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-sequence-metadata.html b/docs/managing-sequence-metadata.html index e3ab468934..ba013f5108 100644 --- a/docs/managing-sequence-metadata.html +++ b/docs/managing-sequence-metadata.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-spectral-data.html b/docs/managing-spectral-data.html index 36b5dc1d39..8993ddc92d 100644 --- a/docs/managing-spectral-data.html +++ b/docs/managing-spectral-data.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-tissue-samples.html b/docs/managing-tissue-samples.html index 630cd3bd70..05976ead9f 100644 --- a/docs/managing-tissue-samples.html +++ b/docs/managing-tissue-samples.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-user-roles.html b/docs/managing-user-roles.html index 39280f612d..e0581d5daf 100644 --- a/docs/managing-user-roles.html +++ b/docs/managing-user-roles.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/managing-vcf-data.html b/docs/managing-vcf-data.html index 2fcbc2d7d0..59f799dca3 100644 --- a/docs/managing-vcf-data.html +++ b/docs/managing-vcf-data.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/r_markdown_docs/assets/images/trial_detail_page_view_repetitive_measurements.png b/docs/r_markdown_docs/assets/images/trial_detail_page_view_repetitive_measurements.png new file mode 100644 index 0000000000..903be8d86b Binary files /dev/null and b/docs/r_markdown_docs/assets/images/trial_detail_page_view_repetitive_measurements.png differ diff --git a/docs/r_markdown_docs/assets/images/wizard_related_phenotypes_download.png b/docs/r_markdown_docs/assets/images/wizard_related_phenotypes_download.png index 0829653ab4..c2ca9c3314 100644 Binary files a/docs/r_markdown_docs/assets/images/wizard_related_phenotypes_download.png and b/docs/r_markdown_docs/assets/images/wizard_related_phenotypes_download.png differ diff --git a/docs/r_markdown_docs/managing_field_trials.Rmd b/docs/r_markdown_docs/managing_field_trials.Rmd index b0876c22bc..543650f377 100644 --- a/docs/r_markdown_docs/managing_field_trials.Rmd +++ b/docs/r_markdown_docs/managing_field_trials.Rmd @@ -585,6 +585,14 @@ knitr::include_graphics('assets/images/trial_detail_page_add_plot_gps_upload_inf This dialog tells you that the file must be XLS or XLSX and must contain: plot_name WGS84_bottom_left_x WGS84_bottom_left_y WGS84_bottom_right_x WGS84_bottom_right_y WGS84_top_right_x WGS84_top_right_y WGS84_top_left_x WGS84_top_left_y The GPS coordinates should be WGS84 format and specify a four-pointed polygon around the plot. +### Repetitive Measurements Section + +If a trial includes repetitive traits or time-series values, you can effectively view and analyze these values through the Repetitive Measurements Section. Start by selecting the desired trait from the trait drop-down menu. Next, define the date range by either using the date-range picker or an interactive slider, which allows you to dynamically adjust the period you wish to examine. Once the date range is set, determine how to handle the repetitive measurements by choosing from various options such as First Value, Last Value, Averaged Value, Sum Values, or All Values. Choosing the "All Values" option enables an additional feature that visualizes the trend of the values over time, helping you identify patterns and trends within the data. + +```{r echo=FALSE, out.width='95%', fig.align='center'} +knitr::include_graphics('assets/images/trial_detail_page_view_repetitive_measurements.png') +``` + ### Uploading Additional Files To Trial It may be of interest to you to upload additional documents, images, or recordings to your trial. To do this, scroll down to the "Uploaded Additional File" section on the trial detail page. From here you can view and download any of these additional files. diff --git a/docs/search_index.json b/docs/search_index.json index 3666837b8c..39f8fe6a66 100644 --- a/docs/search_index.json +++ b/docs/search_index.json @@ -1 +1 @@ -[["index.html", "User Manual of Breedbase Introduction", " User Manual of Breedbase Breedbase team 2024-12-13 Introduction Welcome to the Breedbase manual! Use the table of contents in the left sidebar to navigate to the topic of your choice. At any time you can select specific text in the manual to highlight or annotate it using Hypothesis. Open the Hypothesis sidebar on the right to view existing annotations. You may also use the widgets at the top of the screen to: - collapse the sidebar - search for a specfic topic - change the font size, font type, or the site theme - download the manual as a pdf Manual as a pdf can be download here also. Download This manual is intended for database users. If you are a developer looking for software implementation details, please visit the developer wiki instead: https://github.com/solgenomics/sgn/wiki "],["basic-website-usage.html", "Chapter 1 Basic Website Usage 1.1 Creating a User Account 1.2 Managing your Account 1.3 Menu Layout 1.4 Working with Lists 1.5 User Permissions", " Chapter 1 Basic Website Usage 1.1 Creating a User Account 1.1.1 Verifying first that you do not already have an account Before creating an account, please verify first that you dont already have an account. You can use Search menu to check if you already registered as a user. In the Search menu, selecting the People tab and search your name. If nothing is found, proceed with the instructions below. Otherwise, clicking the Login button. If you have forgotten your password, you can retrieve it by clicking the Forgot your password? link on the login page. 1.1.2 Creating a user account On the right of the toolbar, clicking on Login. It will take you to the login page. On the login page, clicking on the link sign up for an account. It will take you to the page below: Filling in all of the information, then clicking Create Account. After you submit the information, an email will be sent to the provided email address. Checking your email and clicking on the link to activate your account. 1.2 Managing your Account 1.2.1 Login To login, clicking the Login link in the toolbar on any page and enter your username and password. If you have forgotten your password, you can retrieve it by clicking the Forgot your password? link on the login page. 1.2.2 Editing Account Settings Account settings can be edited by clicking on the my profile link displayed as your user name, on the right of the toolbar. You must login, in order to access and change account settings. You can add personal information to your account using the View or update personal information link. To change your password, username, or your contact email, clicking on Update account information link. You must provide your old password before you can make any changes. 1.2.3 Changing Your Account Status: From User to Submitter After you create an account, your account has a user status. This account has limited privileges. Accounts with user status are able to: Change personal information Post comments on pages Post to the forum To upgrade your account status to submitter, contact the database curators using the contact link provided at the footer of each page. Submitter accounts can add data, such as new plots, accessions, phenotype data and images. 1.2.4 Submitting Feedback on an SGN Database We appreciate your feedback! Feel free to submit any questions or suggestions by using the Feedback link provided at the footer of each page. 1.3 Menu Layout SGN Database websites have a toolbar on the top of each page with a number of menus for convenient access of major functions. The menus, as pictured below, are search, manage, analyze, and maps. The toolbar also provides a quick search, a log in button, and a new user button. 1.3.1 Menu Options Search In the Search menu, the options are: Tab Description Wizard Search different accessions and plots by location, year, trial, and trait data. Can also be used to create lists of different types. Accession and plots Search accessions and plots using a variety of criteria Trials Search trials by name, description, breeding program, year, location, and trial type. Markers Search different markers Images Search images contained in the SGN database People Search database users Manage In the Manage menu, the options are: Tab Description Breeding Programs View, add and delete breeding programs Locations View, add and delete locations Accessions Manage and search different accessions Seedlots Manage and search different seedlots Crosses Create new crosses in the database Field Trials Manage field trials. Create trials using different field layouts. Genotyping Plates Manage genotyping plates. Create 96 or 384 well plates. Phenotyping Upload phenotyping files from the Tablet Field Book application Field Book App Manage the field book app data (download files to tablet) Barcodes Refers to the old barcode system, mainly historical Download Download information in the database based on lists Analyze Clicking on the Analyze link will give a full menu of all analysis functions In the Analyze menu, the options are: Tab Description Breeder Tools Breeder Home Access breeding functionalities. Lists important and helpful links. Barcode Tools Manage, create, and download barcodes. Also access barcode tools. Genomic Selection Can search for traits, start building a GS model, and predict values based on genotypes Sequence Analysis BLAST Sequence homology search Other Ontology Browser Browse all recorded ontologies 1.4 Working with Lists Lists are collections of identifiers that are stored in the database. Lists can be composed of accessions, plots, traits, locations, and trials. Lists are attached to the individual users account, and can only be created and seen by the user while logged in. SGN databases make heavy use of lists in a number of tools on the website. For example, trials are created using lists of accessions. 1.4.1 Creating lists Lists can be generated in various ways: One way to create a list is by clicking on the Lists link located on the toolbar. To create a new list, enter the name of your new list and then clicking on the New List button. The name of the list can be anything, but should be unique and should be something to help you easily identify. You can find the list that you entered on the Your Lists page. To add items to your list, click on the View icon to open List Contents page. On the List Contents page, enter items that you want to add to the list, then click on Add button. The page will be updated and will display your items in a table at the bottom of the page. It is possible to sort the list if you need. Select the type of items in your list. To verify that the items that you added to your list are already stored in the database and that you selected a correct type for the items, click on the Validate button. If those items are already in the database, a message will indicate that This list passed validation Note that a list cannot contain duplicate elements. If a duplicate item is entered, the list manager will inform the user that the element is already in the list and will not add it again. Another easy way to create a list is to use 2.1, which can be accessed from the Search menu. 1.4.2 Viewing and editing lists Lists can be viewed and edited using the Lists link on the toolbar. Clicking on the link will open a window that displays all of your lists, as well as an option to create new lists. This page shows all lists that have been created, including those created by using the Search Wizard. You can view and edit your lists by using Actions buttons. Clicking on the view icon will open a new window called List Contents that allows you to change the list name, the type of the list, add new items, or delete existing items. Clicking on the delete icon will delete your list. Caution: this action cannot be undone. Clicking on the download icon will download the contents of your list to your computer. Clicking on the make public icon will make your list available for other users to view and use your list. 1.5 User Permissions Breedbase accounts are assigned one or more of four different roles to determine the level of access they have within the database. The possible roles are User, Submitter, Sequencer, and Curator. Each role grants specific permissions, and careful management of them helps prevent data from being altered or deleted in error. Accounts are also assigned Breeding Program role(s) to grant access to the specfic breeding program(s) they work with. The User role gives an account permission to view and download data throughout the database. The Submitter role gives an account permission to design field experiments and to upload and edit data using the tools in the Manage section. In order to submit and manage breeding data within a given breeding program, a submitter also must have a matching Breeding Program role. The Sequencer role gives an account permission to design genotyping experiments and submit plates to a genotyping service. The Curator role gives an account permission to do all of the above, as well as to delete data within the database. The Curator role also enables the addition or deletion of roles for all database accounts in the Manage User Roles tool. "],["searching-the-database.html", "Chapter 2 Searching the Database 2.1 The Search Wizard 2.2 Accessions and Plot Search 2.3 Trials Search 2.4 Trait Search 2.5 Ontology Browser 2.6 Search Seedlots", " Chapter 2 Searching the Database You can search for information on the database by using the following search options: Wizard, which uses combined criteria specified by users; Accessions and Plots; Trials; Markers; Images; People; FAQ. 2.1 The Search Wizard 2.1.1 How the Search Wizard Works The search wizard presents a number of select boxes, which are initially empty. You start searching by picking a category of data from the dropdown above the left-most select box. Once a category has been picked, the database will retrieve all the options within this category and display them within the first select box. You then select one or more options from the first select box, which activates the second dropdown. You can then select a category from the second dropdown, and repeat this same search process through all four dropdowns and select boxes. In the example above, the locations category was chosen in the first dropdown. The first select box then displayed all the possible locations in the database. The option Ibadan was selected. This activated the second dropdown. The category years was chosen in the second dropdown. The second select box then displayed all the years that are linked in the database to the location Ibadan. From that list, the options 2011 and 2012 were selected. This activated the third dropdown. A final category, accessions, was chosen in the third dropdown. The third select box was then populated with the 3847 accessions in the database that are linked with the location Ibadan in the years 2011 or 2012. In addition to the basic search operations demonstrated above, users can take advantage of two more features: Load Selection from List Instead of picking a category in the first dropdown, users can instead populate the first selectbox from a list by scrolling down in the first dropdown to the Load Selection from List subheading and selecting a list. This is useful for starting queries with a list of plots, as this category is not among the options in the first dropdown. ANY/MIN/ALL Toggle By default, the search wizard combines options within a category using an OR query. In the example above, in the third panel the wizard retrieved accessions associated with the location Ibadan in ANY of the years 2011 OR 2012 If the user clicked the toggle below the second select box to change it to ALL before choosing accessions in the third dropdown, the wizard would instead retrieve accessions associated with the location Ibadan in the years 2011 AND 2012. This will be a smaller set of accessions, because any accessions used only in 2011, or only in 2012 will be excluded. A more advanced search could use the MIN toggle option. This allows the user to make a query in between an ANY or ALL query, where a minimum number of matches from the selected column will be used as a filter for the next column. The minimum can be provided as either a percentage (%) or an actual count of items (#). In the example above, if the years 2011, 2012, and 2013 were selected in the second column, the user could enter 2 in as the minimum and select # as the minimum match type. This would select accessions in the third column that were used in 2 or more of the selected years. 2.1.2 How to use retrieved data Getting more Info Any option in the wizard select boxes (except for years) can be clicked to open a page with more details. The new page is opened in a new tab. Saving to a list You can store the highlighted items in any selected box to lists. This is done using the inputs and buttons directly below the select box. Dont forget, you must be logged in to work with lists! To add items to an existing list, first pick an existing list using the Add to List dropdown on the left. Then click the Add button. A popup window will confirm the action, and display the number of items added to your existing list. To store items to a new list, first type a new list name in the Create New List text input on the left. Then click on the Create button. A popup window will confirm the action, and display the number of items added to your new list. Downloading Data You can download trial metadata, phenotypes and genotypes associated with the highlighted items in the wizard select boxes. This is done using the buttons in the download section at the bottom of the page. Dont forget, you must be logged in to download data! Metadata Trial metadata can be downloaded by selecting a subset of trials from the database or based on your search categories. To download, click on Related Trial Metadata, a dialog will appear. Select download format and click the Metadata button to complete your download. Phenotypes The phenotypes download is quite flexible, and can download a subset of all the trial data in the database based on whichever categories and options you currently have selected. Simply click on the Related Trial Phenotypes link, review the options, changing or adding any additional parameters you like, then click Download Phenotypes. Genotypes The genotype download is more stringent. It requires a minimum of one accession and one genotyping protocol to be selected in the wizard select boxes. The text box in the download section of the page will help track what has been selected. Once clicked, the Download Genotypes button will download a genotype file for the selected accessions. Saving the wizard selections As discussed above, the selections of the individual select boxes in the wizard can be saved separately to a list. The lists can be used as inputs in other tools on the site. However, sometimes creating a selection is quite time consuming and restoring the selections from four different lists would be cumbersome too. Therefore, the selections can be saved together in a dataset, and named for later retrieval. This is done in the section Load/Create Datasets that is below the first two wizard select boxes. To select an existing dataset, one uses the Load Dataset dropdown. A particular dataset can be chosen, and the Load button can be clicked to retrieve and display the dataset in the wizard. To create a new dataset using items that are selected in the wizard, one can enter the name of the new dataset in the Create New Dataset text box. Once the dataset has been given a name, clicking the Create button will save the new dataset. 2.1.3 Updating the Wizard The search wizard uses a copy of the database, or a cache, to return results quickly. If data appears to be missing, it usually means that the cache needs to be updated. Users with submitter privileges or above can do this using the Update Wizard button. One can also use the Refresh Lists button to update the available lists. This will take just a few seconds in small databases, but may take a few hours to complete in larger databases. 2.2 Accessions and Plot Search Accessions and their related materials (cross, plant, plot, population, tissue_sample, training population) can be searched by using Search Accessions and Plots page. On this page, accession is the default stock type; however, you can change stock type by selecting an option from the drop-down list. From this page you can construct detailed queries for stock types. For example, by using the Usage section, the Properties section, and the Phenotypes section you could search for accessions which were diploids used in a specific year and location and were also phenotyped for height. You can also search for accessions based on genetic properties, such as the location of an introgression on a specific chromosome. It is possible to query over any of the available properties, such as ploidy_level, country of origin, introgression_chromosome, etc. In the search result table it is possible to select any of the available properties to view. At the bottom of the accession search there is a phenotype graphical filtering tool. Here you can filter down accessions based on combinations of trait performance. The filtered down accessions are then able to be saved to a list. For information on adding Accessions please see the Managing Accessions help. For information on how field trial plots, plants, tissue samples, and subplots are added to the database, please see the Managing Field Trials help. 2.3 Trials Search Trials on the database can be searched based on trial name, description, breeding program, year, location, trial type, design, planting date, and harvest date. 2.4 Trait Search On the Trait Search page (menu item Search > Traits), traits in the database can be searched by ID, name, or descripiton. Optionally, a starting list of traits can be selected to filter down results. Selecting traits in the results of the search allows one to add the selected results to a trait list, or create a new trait list from the select results. 2.5 Ontology Browser A more advanced tool for searching for Traits is the ontology browser, available by clicking on Analyze and Ontology Browser. From here you can search ontologies and see the various classifications of terms in a tree display. The terms which appear in the Trait Search in 2.4 are only variable terms. The ontology browser shows these variables as different from their grouping terms by indicating VARIABLE_OF like in the following screenshot. 2.6 Search Seedlots Seedlots are different from Accessions in that they represent the physical seed being evaluated in an experiment. Seedlots have things like physical storage locations and seed quantities, which accessions do not. To search for available seedlots you go to Manage and then click Seed Lots. By clicking Search Seedlots, you can specify query information. The results from your search will be in the table below the search form. "],["managing-user-roles.html", "Chapter 3 Managing User Roles 3.1 What are User Roles? 3.2 The Manage User Roles page", " Chapter 3 Managing User Roles 3.1 What are User Roles? Every user account in Breedbase has one or more associated roles that determine the authorizations (what the user is allowed to do) in the database. There are three fundamental roles, curator, submitter, and user, which determine basic read/write levels. The curator status can read and write everything in the database. The submitter status can add information and edit or delete previously submitted information. The user type can only read data. Additional roles represent the breeding programs, and are sometimes used to fine-tune write and edit capabilities, as it necessary for multiple users in a breeding program to edit each others data. 3.2 The Manage User Roles page In the Manage menu, select the item User Roles. This will show the current users in the database with their associated roles. If you are logged in as a curator, the table will show system roles as well as breeding program roles; if you are logged in as a submitter or user, it will show breeding program membership. If logged in as a curator, the roles can be added or deleted. To delete a role, click on the X in the role name. A confirm dialog will be displayed to prevent accidental deletion. To add a role, click on the plus sign next to the roles. A dialog will pop up with a list of roles. Select the desired role and click Submit. The new role should be displayed next to the user immediately. Role deletions and additions will be effective immediately. It is recommended that few users be given the curator privileges to avoid confusion over data ownership and accidental data overwriting and deletion. @ref(managing_user_roles) "],["managing-breeding-programs.html", "Chapter 4 Managing Breeding Programs", " Chapter 4 Managing Breeding Programs New breeding programs can be added by using Add New Program button on the Manage Breeding Programs page. Clicking on the Add New Program button will generate a blank form for you to fill out the name and description of the breeding program that you want to add. After completing the form, click on Add Breeding Program button to finish the process. "],["managing-locations.html", "Chapter 5 Managing Locations", " Chapter 5 Managing Locations Field locations can be managed using the Manage Locations page. On this page, locations in the database are organized based on their breeding programs. Each location has a link to trials conducted in that location. To add a new location, click on the Add Location button that links to the Add New Location form. On the Add New Location form, fill out the location name that you want to add. Latitude, longitude, and altitude are optional. Submit the new location by clicking on the Add Location button at the bottom right of the form. "],["managing-accessions.html", "Chapter 6 Managing Accessions 6.1 Add Accessions Using A List 6.2 Uploading Accessions and Accessions Info From A File 6.3 Email alert for accession upload 6.4 Add Parentage (Pedigree) Information to Accessions 6.5 Working with grafts 6.6 Bulk renaming of accessions", " Chapter 6 Managing Accessions The Manage Accession page provides links for adding new accessions. You can choose to add accessions into the database by either using a List you have created or by uploading XLS or XLSX file. Both options will be detailed below. To begin click on the Add Accessions or Upload Accession Info link. This will open a dialog allowing you to select either Using Lists or Uploading a File. 6.1 Add Accessions Using A List First we will show how to add accessions Using Lists. Here you select an accession list which you have previously made. If you need to create or edit your list you can do so now by clicking Manage Lists. Once you have selected your list you can click Continue. The first dialog which can appear will show the accessions which already exist in the database. Click Continue. The next dialog which can appear will show accessions which have very similar matches to the accession names you are adding. In the example below, there are two accession names that are very similar to accession names already in the database. TME0419 is very similar to TME419, and actually is probably a mistake that should not be added to the database. To avoid situations in adding a mistaken duplicate accession, the database gives you options for moving forward with these very similar looking accession names. You can either continue saving the name in your list, replace name in your list with selected existing name, remove name in your list and ignore, or add name in your list as a synonym to selected existing name. Clicking Download Fuzzy Matches will return a tabular result of the fuzzy accession name results shown. Click Make changes and continue to move on. The final dialog shows the accessions that will be added. Here you need to assign the species of these accessions. You can optionally group the accessions into a population and/or add an organization for the accessions. Once you click Add Accessions, the new accessions will be created in the database and you will see the following confirmation dialog, which includes links to the newly created accessions. 6.2 Uploading Accessions and Accessions Info From A File The process to upload accessions is very similar to using a list, but enables you to add a variety of properties, such as synonyms, to the accessions in bulk. Clicking on Spreadsheet format will show the following dialog. Here it shows that the file must be XLS or XLSX format and can contain a number of header columns as attributes. It is important that you use exactly the same header column names as listed here. In columns that indicate that many attribute values can be passed at once using (s), such as synonym(s), you can pass a comma separated list of values, such as synonym1,synonym2. Once you have selected your XLS or XLSX file for upload, click Continue. The following process is the same way as with lists: The first dialog which can appear will show accession names which are already in the database. Click Continue and the next dialog that can appear will show fuzzy matches for the accession names you are trying to upload. Here you can choose to prevent adding accession names which look very similar to each other as wrongly duplicated accessions. Click Continue and the final dialog that will appear will show the information to be added into the database. Here it is divided into accession names that are new and accession names that already exist in the database; however, for the accession names that already exist it will show additional attributes that originated from your file that will be added to these accessions. Once you click Add Accessions, the new accessions and information will be created in the database and you will see the following confirmation dialog, which includes links to the created and updated accessions. 6.3 Email alert for accession upload When uploading accessions from a file, you have the option to receive email notifications about the status and results of your upload by clicking the Email Alert checkbox. By default, the system will use the email address associated with your account, but you have the option to enter a different email address if you prefer. After submitting, the upload process runs in the background, allowing you to continue using the interface without interruptions. Once the process completes, you will receive an email with the upload results, including any warnings or errors that may have occurred during the upload. 6.4 Add Parentage (Pedigree) Information to Accessions Pedigree data can be uploaded from your computer by clicking on Upload Pedigree File IMPORTANT! Please use only tab-delimited text file format (.xls or .xlsx formats are NOT supported). You can find detailed information on how to prepare pedigree file by clicking on File format information The currently supported format has four tab separated columns: progeny name female parent accession male parent accession type Type can be biparental, self, backcross, sib, polycross, reselected, or open. In the case of the open type, the male parent accession field can remain blank. For all other types, both columns should be filled, even if they contain the same information as another column (such as self). 6.5 Working with grafts Grafts are plants that are composed of a rootstock and a scion, which are genetically different and fused together, usually at the stem level. To work with grafts, the grafts interface needs to be activated by adding a configuration parameter in the sgn_local.conf file. The parameter is show_grafting_interface. It should be set to 1 in sgn_local.conf, the default is 0 (in sgn.conf). Grafts to be created need to be specified using an Excel file (xlsx format) with two columns. The first column should have the header scion accession and should list accession names that will be scions. The second column should have the header rootstock accession and should list accession names that will be rootstocks. In the database, the graft accessions will created as single accessions. The graft accession will have two relationships, one to the scion accession (scion_of relationship) andone to the rootstock (rootstock_of relationship). These relationships are displayed on the pedigree viewer. The graft accession name is created from the scion accession name and the rootstock accession name, separated by the graft separator character. By default, the graft separator character is the plus sign +. The graft separator character can be changed in the sgn_local.conf file, using the parameter graft_separator_string. The graft separator string should not occur in any other accession names that are not grafts. When the grafting interface is activated, a new button will be shown on the manage accessions page, called Upload Grafts. Clicking the button brings up the upload grafts dialog. Select the Excel file containing the grafting information. The system will validate the file, for example, check whether the accessions are in the database, and if the headers are correct. The validation result will be presented, and if problems are found, they will be listed. In addition, if there are problems, the Upload button will be grayed out and upload will not be possible. Conversely, if there are no problems, the Upload button will be activated and can be clicked to upload the data. If the upload completes, a completion message is displayed with a summary what was uploaded. Grafted accessions can be used like any other accession, for example, they can be used on field layouts. If you create a list of graft accessions, use the list type accessions. Note that you shouldnt create new grafts based on other grafts. The scion accession and the rootstock accession have to be different, otherwise they will not be created. 6.6 Bulk renaming of accessions Accessions can be renamed in bulk using the rename accessions feature. To rename accessions, prepare a tab delimited file with two columns: the first column should have the header old name and contain the accession names that need to be changed. The second column should have the header new name and contain the names that the accessions in column 1 should be renamed to. The accession renaming feature is available from the Manage->Accessions page. Click on the Rename Accessions button. The first step is the upload of the file with a verification step. The verification step checks whether all the accession names in column 1 exist in the database, and whether all the accession names given in column 2 do NOT exist in the database. Only if both conditions are met, will the rename button become active, otherwise an error message is displayed listing the offending accession names. Optionally, the old name can be automatically added as a synonym to the renamed accession, using the checkbox on the submit form. This option is clicked by default. Unclick the checkbox to NOT save any old names as synonyms. Note that accession renaming should not be undertaken lightly. This feature is intended for special use cases, such as where accessions are created in a nursery with a name that is different from the accession name in the downstream breeding program. It can also be used to rename accessions in bulk that have spelling mistakes and other issues. Please note however, that the tool does not make any attempt to change the names of associated elements, such a plots, that may have been constructed using accession names. Because of the many implications of accession renaming, the feature is limited to accounts with the curator role. "],["managing-seed-lots.html", "Chapter 7 Managing Seed Lots 7.1 Add New Seedlot(s) 7.2 Seedlot Transactions 7.3 Seed Inventory 7.4 Find Seedlots For a List of Accessions 7.5 Create a seedlot for an Accession or Cross 7.6 Add quality data to a seedlot 7.7 Seedlot Maintenance Events 7.8 Deleting Seedlots", " Chapter 7 Managing Seed Lots Seedlots are different from Accessions in that they represent the physical seed being evaluated in an experiment. Seedlots have things like physical storage locations and seed quantities, which accessions do not. The seed in seedlots can be from crosses or can be named accessions. Seedlots from crosses would represent seed harvested. Click Manage and then Seed Lots to begin. 7.1 Add New Seedlot(s) To add a single new seedlot, click on Add Seedlot. This will bring up the following dialog where you enter information about where the seedlot exists, what accession or cross is contained in it, and how many seeds there are. A seedlot must contain either an accession or a cross, and not both. A seedlot must have a weight in grams or a seed count or both of these. In the case where you have many seedlots to add to the database, you can upload an excel XLS or XLSX file instead. Click Upload Seedlots to see the following dialog. 7.2 Seedlot Transactions Seedlots are capable of tracking where seeds came from, such as from crosses, and to where seeds go, such as to plots in the field. If you navigate to a seedlot detail page you will see the following. On this page you see and can edit information regarding a single seedlot, such as its name and location. You will also see a table indicating all t he transactions that a seedlot has been involved in, such as if it was planted in a plot in the field. Transactions to field plots are created when adding or uploading a new trial or from a trials detail page. Clicking on Add New Transaction let you add a transaction from between this seedlot and another seedlot. This kind of transaction is useful for representing if you have distributed seed to different locations. 7.3 Seed Inventory To inventory your seed: 1) Make sure your seedlots are in the database. Use Add New Seedlot to add a single seedlot or Upload New Seedlots to add many. 2) Make sure your seedlots are barcoded. You can print these barcodes from the database. 3) Use the Inventory Android Application to scan seedlot barcodes and record weight. Then use Upload Inventory to upload this info into database. If you prefer you can create your own CSV file and upload that, if you do not want to use the Inventory Application. For more info about the Inventory Android Application go to Inventory. Clicking the Upload Inventory button will bring the following dialog: The CSV file that should contain your inventory should meet these Template requirements. The Seed Inventory Android Application exports this exact file. 7.4 Find Seedlots For a List of Accessions A convenient tool for searching available seedlots for a list of accessions is available in the list tool. First open up your list of accessions. For help opening a list of accessions please see the List section help. There is a button called See Available Seedlots. Once you click this, you will see the following table in a dialog. From here you can create a list of seedlots using the checkboxes and the input at the bottom. 7.5 Create a seedlot for an Accession or Cross Complementary to what we saw above for creating seedlots from the Manage Seedlots page, it is possible to create a new seedlot from an accessions detail page or from the cross detail page. On the accession detail page, this is visible in the Related Stocks section as seen below. The cross detail page has an identical section. Notice the link for creating a new seedlot, which streamlines adding the seedlot. 7.6 Add quality data to a seedlot Quality information can be added to a seedlot in the quality field. This is also available as a column in the file upload format. It is recommended to use a controlled vocabulary, defined by the user, for the quality field. For example, good quality seed should be labelled ok, whereas other quality descriptors could be moldy, insect damage, or low sprouting, etc. 7.7 Seedlot Maintenance Events For some crops, such as sugar kelp, a seedlot requires routine maintenance for the successful long-term storage of the seedlot. (For example, a Seedlot Maintenance Event for sugar kelp would be the routine change of the water that gametophytes are kept it). Breedbase can now store a record of these Seedlot Maintenance Events associated directly with existing Seedlots. Maintenance Events can be uploaded using a simple Excel template or recorded directly on the website. 7.7.1 Setup Each Breedbase instance needs to be configured to support the storage of Seedlot Maintenance Events since each crop will have their own distinct set of maintenance events for their seedlots. To check if your Breedbase instance supports this feature, go to the Manage menu and select the Seed Lots page. Make sure you are logged in and look for the Seedlot Maintenance button near the top, next to the Create Seedlot(s) and Upload Inventory buttons. If you dont see this button, contact the developer(s) supporting your Breedbase instance and ask if they can setup this feature. The location of the Seedlot Maintenance button on the Manage > Seed Lots page 7.7.2 Adding Events Seedlot Maintenance Events can be added using two methods: 1) Uploading an Excel template or 2) Recording events directly on the website Uploading Events with Excel Template To bulk-upload a file of Seedlot Maintenance Events, first create an Excel (.xls or .xlsx) file with the following headers: seedlot - the name of the Seedlot to associate the event with (must exactly match an existing Seedlot in the database) type - the name of the Seedlot Maintenance Event type (these vary between Breedbase instances, a list of supported event types is displayed on the upload page) value - the value of the Seedlot Maintenance Event (these may be different for each event type and vary between Breedbase instances, a list of supported event values is displayed on the upload page) notes - optional, additional notes/comments about the event operator - the username of the Breedbase user that recorded the event timestamp - the date/time the event was recorded, in YYYY-MM-DD HH:MM:SS format Once you have an Excel file with the events filled out, follow these steps to upload the events to the database: Make sure you are logged in to your Breedbase instance Go to the Manage > Seed Lots page Select the Seedlot Maintenance button Select the Upload Maintenance button Choose your Excel (.xls or .xlsx) file to upload Select the Upload button The Seedlot Maintenance upload dialog, showing the supported event types and values (for sugar kelp) Recording Events on Website To add individual Seedlot Maintenance Events to the database in real time, as theyre being recorded, use the Record Maintenance page. Follow these steps to record Seedlot Maintenance Events: Make sure you are logged in to your Breedbase instance Go to the Manage > Seed Lots page Select the Seedlot Maintenance button Select the Record Maintenance button Enter the Seedlot Name or scan a barcode that has the Seedlot Name encoded. Once entered, the box at the top of the page will display basic information about the Seedlot as well its recently recorded events. Select or Enter the values of individual events Optionally, notes button next to each event to add additional notes/comments about that specific event Make sure the operator/username and timestamp are correct Select the Submit button to add the recorded events to the database. NOTE: any events that remain selected as Not Recorded will not be submitted to the database. The Seedlot Maintenance record page, as configured for sugar kelp 7.7.3 Displaying Events Recently recorded Seedlot Maintenance Events are displayed in a table from the main Seedlot Maintenance page, as well as the detail page for individual Seedlots. Unfiltered table of recent Seedlot Maintenance events The events displayed in these tables are sorted by timestamp, with the most recently recorded events displayed first. The displayed events can be filtered using any number of supported filter criteria, such as: - seedlot names (as entered on the page or using an existing seedlot list), - dates (on, on or before, before, on or after, and/or after the entered dates) - event types - event type values - operator/username Select the properties of the filter(s) you want to apply, then select the Add button next to the button to add the filter to the list of applied filters. Once youre done adding filters, select the Filter button to search the database for the filtered events. A filtered table of Seedlot Maintenance events The filtered events can be downloaded directly from the table using the Excel or CSV buttons at the top of the table. Or Seedlot Maintenance Events can be bulk-downloaded (this includes all events for a Seedlot) using a list of Seedlots from the main downloads page (see below). 7.7.4 Downloading Events To bulk-download all events for a specific subset of Seedlots: Create a list containing the Seelots you are interested in. Go to the Download Using Lists page (Manage > Download) Find the Download Seedlot Maintenance Events section Select your list of Seedlots Select the Download button to generate the download file The downloaded file will follow the same format as the upload template and will contain all recorded Seedlot Maintenance Events for each Seedlot in the list. 7.8 Deleting Seedlots Seedlots can be deleted on the Manage Seedlots page (/breeders/seedlots) by search the seedlot and then clicking the X to delete one seedlot at a time. To delete a seedlot, the logged in user needs the required delete privileges on the seedlot. The seedlot also should not have any transactions associated with it (except for the initial transaction). To delete seedlots in bulk, generate a list of type seedlot, for example, using the wizard. Open the section Delete seedlots using a list on the Manage Seedlots page and select the list. Seedlot deletion using a list is only available to user with curator status. "],["managing-populations.html", "Chapter 8 Managing Populations", " Chapter 8 Managing Populations Populations are modeled as groups of accessions. This grouping can be useful in downstream analyses. To manage these populations go to Manage Accessions and scroll tp the bottom. To add a new population click Create Population. The following dialog will appear where you choose a list of accessions and give a name to the new population. Please note it is also possible to create a population when you are uploading new accessions into the database. Click on the plus (+) button next to Populations to see all the available populations. Click on a population name to see the accessions in the population. From here you can delete accessions from a population as well as add new accessions to the population. "],["managing-crosses.html", "Chapter 9 Managing Crosses 9.1 Crossing Experiment 9.2 Cross 9.3 Cross Wishlist 9.4 Crossing Experiment Detail Page 9.5 Cross Detail Page", " Chapter 9 Managing Crosses Information for crosses can be managed using the Crosses option in the Manage menu. 9.1 Crossing Experiment Different crosses in the same trial/nursery/project are grouped via crossing experiment. Crossing experiments are organized based on their breeding programs. To find a crossing experiment, you can either type the crossing experiment name in the Search box, or look for the crossing experiment directly in its breeding program by clicking on the + icon. In each breeding program, crossing experiments can be placed directly in the breeding program, or organized in folders. The Folders section allows you to place crossing experiments in folders, move a crossing experiment in a folder to another folder, or rearrange your folders within a breeding program. 9.1.1 Add New Crossing Experiment To add a new crossing experiment, click on Add Crossing Experiment link. Required Information: Crossing Experiment Name: enter a name for the crossing experiment. The crossing experiment name must not already exist in the database. Breeding program: select a breeding program that is available in the database. New breeding programs can be added on the Breeding program page, accessible from the Manage menu. Breeding Program Page Location: select a location for the crossing experiment. New locations can be entered on the Locations page, accessible from the Manage menu. Location Page Year: select a year. Description: enter a description for the crossing experiment. After filling in the information, click Submit to generate the crossing experiment. 9.2 Cross 9.2.1 Add New Crosses Add a cross by using the Add New Cross dialog To add a single new cross, click on Add Cross link. Enter cross information in the popup dialog. Required Information: Crossing experiment: select a crossing experiment available in the database. Location: select a location available in the database. Cross name: enter a name for the cross. The cross name must not already exist in the database. Cross type: the options for cross types are: biparental, self, open pollinated, bulk, bulk selfed, bulk and open pollinated, double haploid, polycross, reciprocal and multicross. The Female Parent and Male Parent field are auto-complete fields for accessions that are already in the database. The parents specified will be entered in the pedigree of the new accessions generated by this cross. Optional Information: Female Plot and/or Male Plot: In addition to the accession names, specific plots used in the cross can also be added to the database. To retrieve plot names associated with each female/male accession, enter your trial name, then click Search Plots. Plot names of each parental accession in that field trial will be shown in the drop-down list, you can then select the plot used in the cross. Additional crossing experimental information such as pollination date, number of flowers, number of fruits, number of seeds can be specified during adding new cross. Alternatively, this information can be updated or edited directly on the Cross Details page. If you know the number of accessions that are generated from the cross, they can be instantiated immediately in the database by clicking the Add accessions for progeny checkbox and specifying the number. Click Submit to generate the cross. Upload New Crosses To upload new crosses from an Excel file (.xls or .xlsx), click on Upload Crosses link. Select a crossing experiment and a location available in the database from drop-down lists and choose a file that you want to upload, then click Upload File. Please check spreadsheet format carefully. The file must be an Excel file (.xls or .xlsx). 9.2.2 Update Crosses by Uploading To upload progenies and/or experimental info of crosses already in the database, go to Manage-Upload page. In the Crosses section, there are links for uploading progenies and experimental info. Please check spreadsheet format in each link carefully. The file must be an Excel file (.xls or .xlsx). Note: crossing experimental information is customized based on the need for each crop. As a result, column headers for experimental info in your database may be different from the information shown in this manual. 9.3 Cross Wishlist An Android ODK application is being developed to record cross information on a mobile device in the field. To link this mobile application with the database, the Cross Wishlist can be used to create a plan for which crosses to perform. This tool is available on the Manage Cross page. It is currently only available on certain databases, so when you click this link you may see an alert mentioning that the cross wishlist is not available on your database. 9.3.1 Create a Cross Wishlist Step 1. Select the accessions to be crossed in your trial There are two interfaces for this step, either Not Using Lists or Using Lists. Depending on if you already have a list of female and male accessions to use, you can decide on which interface to use. The end result of using either interface is the same. We will start by showing Not Using Lists. First select the trial in which the crosses are to be performed. This will populate a select box with all the accessions used in that trial. From here, one or many accessions can be selected as the female accession. Once the female accessions are selected, a table is populated. Each row in this table begins with the female accession that was selected, followed by a select box with all the accessions used in the trial. From here, one or many accessions can be selected as the male to use in the cross. Once the male accessions are selected to cross with each female accession, a table indicating priorities appears. Priority is meant to indicate an order in which to attempt the cross; first the highest priority male will be considered, but if this cross is not possible then subsequent males will be considered. An equal priority can be given and this will not indicate a specific order to follow. Alternatively, we could have used the Using List interface instead. Here we select the trial in which the crosses will be performed and we provide a list of accessions to consider for the females and the males to be crossed. Step 2. Select the female plots to be considered in the crosses After selecting your lists, the table below is populated. The first column has all the female accessions specified and the header row has all the male accessions specified. The males to consider crossing with each female are indicated with priority. After female and male accessions are selected to cross, either by the Nor Using List or Using List interface, click Next. The next dialog will allow selection of specific female plots to use for the cross. Sections for each female accession selected will appear with the field layout displayed. Selecting all plots in which the female is present indicates that the cross should be performed on all plots where that female accession is present. Step 3. Transfer the cross wishlist to your mobile crossing application Clicking Push Cross Wishlst for ODK Use will send the cross wishlist plan to the ONA server for use by the mobile ODK application. Crosses can then be performed and recorded in the field using the mobile application. Afterwards, the crosses are sent back to our database and stored. 9.4 Crossing Experiment Detail Page Information for crosses in the same crossing experiment is compiled in the crossing experiment detail page. Each cross name, female parent, male parent, female plot and male plot has a link to its own detail page, which contains information specific to each one. Note: crossing experimental information is customized based on the need for each crop. As a result, the details of the information in your database may be different from the information shown in this manual. 9.5 Cross Detail Page Information of each cross can also be viewed in its detail page. This page allows you to update or edit crossing experimental information and add progenies related to that cross. Note: crossing experimental information is customized based on the need for each crop. As a result, the details of the information in your database may be different from the information shown in this manual. "],["managing-field-trials.html", "Chapter 10 Managing Field Trials 10.1 Trial Detail Page 10.2 Adding Trials 10.3 Updating Trial Data 10.4 Deleting Trial Data", " Chapter 10 Managing Field Trials To view trial details on the database, click on the Field Trials link under the manage menu on the toolbar. Clicking on the Field Trials link will bring you to the Manage Trials page. On this page, trials are organized according to their breeding programs. To access trial details, click on the + icon next to your breeding program. Trials can be placed directly in their breeding program. Alternatively, they can be organized by using folders within each breeding program. Clicking on trial name will take you directly to the trial details page. 10.1 Trial Detail Page Trial detail page displays important information about individual trial including breeding program, location, year, description of the trial, design, and any files associated with that trial. The Navigator section on the trial detail page allows easy access to all aspects of your trial. This section contains subsections for printing labels for your plots or plants, recording phenotypes, viewing your trial layout or design, viewing phenotypes for this trial, or conducting analyses. The transplanting date field feature will only be shown if it has a value. To add a transplanting date after creating a trial, change the show_transplanting_date parameter from 0 to 1 in the SGN config file. As a result, you will be able to add a date under the transplanting date field by clicking the Edit Trial Details on the trial detail page. 10.2 Adding Trials Only users with the account status of submitter may create trials. To learn how to change your account status from user to submitter visit the 1.2 page. 10.2.1 Prerequisites To add a trial, all of your accessions should already exist in the database before you begin to design a trial. If you have accessions that are not in the database, see the instructions for adding accessions . Breeding program and location for your trial should also exist in the database. If you need to add breeding program and/or location to the database, see instructions for adding breeding program and location in the Managing Breeding Programs and Managing locations respectively. On the Manage Trials page, there are three alternative methods for you to add new trials: by using Add Trial form, Upload Trial form, or Add Multi-location Trial form. 10.2.2 Adding a trial by using Add Trial form Step 1. Begin the Design new trial workflow Click on Design New Trial to begin. The first step in this workflow is an introduction that looks like: Here it gives information about what is required for a trial, including that to create a new trial, you need to create a list of the accessions that you would like to use in the trial. Lists can be viewed, created, and modified with the lists tool at the upper right of the screen. For more information on lists, click here. Step 2. Enter Trial Information On this screen you need to enter basic information about the trial, such as breeding program and location(s). You must also select a design type, such as Complete Block Design. The design is important because it influences how your genotypes are distributed and randomized over the trial. You must first click validate before proceeding to the next step. Step 3. Enter Design Information On this screen you need to specify a list of accessions to use in the experiment. This list must be a valid list of accessions. You must also specify all required design information, such as number of blocks in this case. Step 4. Enter Field Map Information (Optional) On this screen you can specify how the row and column numbers will be generated for the plots in the trial. The row and column number represent a relative position of the plot in the field. If you are not exactly sure of how you will plant the plots in the field or you have an irregular (non-rectangular) layout, you can skip this step for now. This information can be added on the Trial Detail Page once the trial is saved in the database in order to reflect exactly how the plots were planted in the field. Step 5. Custom Plot Naming (Optional) On this screen it is possible to change the format in which plot names will be generated for your trial. It is recommended to skip this step and just use the format generated by the database by default. Step 6. Review Designed Trial On this screen you can review the trial that the database has generated. You will see a graphical representation of the trial. The numbers on the squares represent the plot_number of each plot and on mouse hover you can see further information about the plot. You will also see a table representation of all the plots and their information. If you want to redo the randomization, you can click the Redo Randomization button. At the bottom there is a brief summary of the trial followed by two buttons. Step 7. Add Field Management Factors to your design (Optional) You can add Field Management Factors by clicking Add Field Management Factor(s) to Design. Clicking this opens a dialog to name your factor. You can name this to account for fertilizer or watering regime or inoculation or anything else. This is optional and can be added from the trial detail page afterwards. Click Continue and a dialog will appear where you can specify plots for which the factor was applied. There is a select all button also. Step 8. Saving new trial in the database Once you are done reviewing the trial you can click Confirm to save the generated trial into the database. Once the trial has saved you will see the final completion screen: 10.2.3 Adding a trial from an uploaded file If you already have trial design layout in a spreadsheet, you can add your trial into the database by using Upload Trial form. To access Upload Trial form, click on Upload Existing Trial(s) button on the Manage Trials page. When you click Upload Existing Trial(s) you will see the following workflow. Notice that there are 5 numbered sections to the workflow. Step 1: The first step is to understand what the format of the trial upload is. It is important to understand that the field layout represents plots in the experiment. Each plot has a globally unique plot_name, a sequential plot_number that is unique in the trial (but not globally unique. e.g.101, 102, 103 for three separate plots), an accession_name representing what genotype is planted in that plot, and a block_number representing design replication. Each plot can be thought of as having a row_number and a column_number representing the relative position of the plot in a grid (e.g.the top left plot is row 1 column 1 following by row 1 column 2). Each plot can be planted with an amount of seed from a seedlot, where the seedlot_name represents the specific seed packet that was used, and num_seed_per_plot and weight_gram_seed_per_plot represent amount that were transferred from the seedlot_name to the plot_name. Treatments (management factors) can be applied onto plots using additional column names in your file, where a 1 represents if the factor was applied to the plot and an empty cell means it was not applied. This information and more can be found by clicking Information about file format, which shows the following: Minimum File requirements All accession names in the file must exist in the database. See adding accessions for more information. The uploaded file should be XLS or XLSX file format (NOT CSV). The first row (header) must contain the column names: plot_name accession_name plot_number block_number is_a_control rep_number range_number row_number col_number seedlot_name num_seed_per_plot weight_gram_seed_per_plot Minimal Example: plot_name accession_name plot_number block_number is_a_control rep_number range_number row_number col_number seedlot_name num_seed_per_plot weight_gram_seed_per_plot 2018plot1 my_accession1 101 1 1 2018plot2 my_accession2 201 2 2018plot3 my_accession2 102 1 2018plot4 my_accession1 202 2 1 File validation In case of errors in the uploaded file such as missing or invalid data, a window will appear listing the specific errors in the file that must be corrected before a successful upload. Uploading a trial with Field Management Factors You can upload a trial with field management factor(s) by adding additional column(s). The column header will be the factor e.g.fertilizer, watering regime, inoculation, etc. and the values in these columns will be either 1 or empty, indicating that the factor was applied to the plot or not. Step 2: Once you feel that your experiment field layout is in the right format, click on to the Next Step. You will see the following form which must be filled in completely: The trial name must be globally unique in the database. Please try to follow standard naming conventions for your group. First you need to validate the form, and then you can click Upload Trial. Step 3: In the case where you have uploaded an experiment using accession_names that are not already present in the database, you will be taken to this screen. If the accession_names in your file are all already in the database, this step will be skipped. The reason it is necessary for your accessions to be in the database before you can add a trial using them is that a single accession can be used among many trials and therefore must exist as a separate entity in the database; because of this it is also very important to be careful about adding wrongly duplicated accession_names into the database. From this screen it is possible to make a new list with the missing accession_names and then click Add Accessions to the database to immediately resolve the issue. Once all your accessions are in the database, click to move to the Next Step. Step 4: In the case where you have uploaded an experiment using seedlot_names that are not already present in the database, you will be taken to this screen. If the seedlots in your file are all already in teh database, this step will be skipped. The reason it is necessary for your seedlots to be in the database before you can add a trial using them is that a ginel seedlot can be used among many trials and therefore must exist as a separate entity in the database. From this screen it is possible to add the missing seedlots; you can either upload an XLS or XLSX file to add many at once or you can add them one by one. Once all your seedlots are in the database, click to move to the Next Step. Step 5: If there are any other errors with your file, such as if the plot_names are not globally unique in the database or your plot_numbers are not unique in your trial or row_number is not an integer or any other error, you will see the errors listed in the red box. It is up to you to correct these errors in your file. Simply open up the file you selected earlier in Excel and correct the issues and then save the file. Then you can click Submit Trial and it will resubmit it for you. You can continue to edit your file here and submit as many times as you need until it is accepted. Completion screen Whether you were lucky enough to submit your trial successfully on Step 2 or if you tried many times on Step 5, once your trial has been saved in the database you will see the following screen: 10.2.4 Multi-location trials To add multi-location trials, simply select the multiple locations while using the Add Trial form. This will create a separate trial for each selected location, but they will share the same design and will be grouped in a single folder. By default each trial design will have a fresh randomization, but if desired you may check the Use same randomization for all locations option. 10.2.5 Email alert for multiple trial design upload When uploading multiple trials from a file, you have the option to receive email notifications by clicking the Email Alert checkbox. By default, the system will use the email address associated with your account, but you have the option to enter a different email address if you prefer. After submitting, the upload process runs in the background, allowing you to continue using the interface without interruptions. Once the process completes, you will receive an email with the upload results. 10.2.6 Viewing Plot Layout and Trait HeatMap 10.2.6.1 Viewing plot layout In the Field Layout Tools and Phenotype Heatmap section of a Trial Detail page, the trial physical layout is displayed by default. The relative position of the plots will be displayed based on the row and column positions given to the plots during the trial creation or upload steps. The plots are color-coded based on the plots rep and block numbers and whether or not it is used as a check. Hover the mouse over the plot to see details about a specific plot. If there is more than one trial grown in the same physical field, the trial layouts of all of the trials can be shown together if the trials share these properties: Each trial has the same year Each trial has the same location The location type of the trials location is set to Field The row and column positions of all of the plots (across the related trials) dont overlap. For example, trial #1 starts at row 1 and trial #2 starts at row 10. When these conditions are met and you check the Select Trials in Same Field checkbox, the plots from all of the related trials will be displayed on the same field layout. The plots will be color-coded by trial. The planting order and harvest order downloads will include the plots from all of the displayed trials in the order in which the plots occur in the field. 10.2.6.2 Viewing plot layout for multiple trials Tracking plot images on fieldMap Plot images can be seen on fieldMap if a plot is associated to any image. To view plot image(s), click on a plot, a dialog will appear. On the appeared dialog, click on View plot images. To see more images if a plot has more that 2 images, click on See more images Medium size of an image can be viewed by clicking on an image. Viewing assayed trait heatmap Phenotype heatmap can be viewed by selecting a specific assayed trait from the selectbox drop-down. Mousing over the plots, highlights the plot in green and also displays the plots field information including the selected traits phenotype value. Suppressing Plot Phenotype Clicking on a plot on the heatmap would display a dialog that has a button for suppressing a plot phenotype value for a given trait. A suppressed plot value can be excluded during trial analysis and phenotype download. 10.2.7 Adding additional information in the Trial Detail page After you added a new trial to the database, you can edit trial details or add more information for that trial through theTrial Detail page. Uploading Physical Trial Layout You can upload physical trial layout by clicking on the Upload trial coordinates button on the Trial Detail page. Please check file format carefully. You can find file format information by clicking on the Spreadsheet format on the Upload trial coordinates window. Spreadsheet format: Physical Trial Layout File requirements All plot names in the file must exist in the database. The uploaded file should be tab delimited (txt). The first row (header) must contain the column names Example: plot_name row_number col_number plot1 1 1 plot2 1 2 plot3 1 3 Select the trial layout coordinates file that you want to upload for this trial, then click OK button to upload the file. The following message is displayed after the coordinates are uploaded. The field layout can be viewed by clicking on the Trial Heatmap Section to see a drop-down of the field map. Downloading Field Map Spreadsheet Field map spreadsheet can be downloaded if the trial has field coordinate (row and column numbers) uploaded for it plots. To download, click on the Download FieldMap Layout link on the Trial Heatmap section. A dialog will appear, click on the submit button to download. Click to view downloaded spreadsheet. Editing Physical Trial Layout Usage Help link contains information on how to edit physical trial layout. There are three different options for editing trial layout: Replacing plot accession by clicking on the plot in the layout. Replacing trial accession by using Edit Field Map link. Substituting plot accessions by using Edit Field Map link. When you move a cursor over a plot on the trial layout, information for that plot appears. To edit a specific plot, clicking on that plot. Entering new accession on the Replace Plot Accession form, then clicking on Replace Plot Accession button. To replace an accession (in every plot/plant of that accession), clicking on Edit Field Map button. On the Edit Field Map window, clicking on Replace Accession button. Selecting any accession that you want to replace and entering your new accession, then clicking Replace Trial Accession button. You can switch plot accessions between any two plots by clicking on Substitute Accession button. On the Substitute Plot Accession form, selecting the two plots that you want to switch, then clicking on the Substitute Plot Accession button. 10.2.8 Downloading the Trial Layout from the Trial Detail page Click on Download Layout on the Trial Detail page. The trial layout includes all information regarding the observation units in the experiment. The observation units can be plots, plants, or subplots. The trial layout can include trial design information such as the block_number and rep_number. It can also include physical map information such as the row_number and col_number, if that information is available for the trial. The trial layout also includes information regarding treatments that have been applied in the field. Optionally, the layout can give information regarding accessions global performance for a list of traits. 10.2.9 Adding Plant Entries To Your Trial After you added a new trial to the database you can choose to add plant entries to your trial. Adding plant entries enables plant level phenotyping. It is generally better to enter data at the plant level into the database because it is always possible to calculate plot level phenotypes from the individual plant data. Plant entries can be added to your trial in two ways: 1) Automatically generated by the database. The only input required is the number of plants per plot. 2) Uploaded in an XLS or XLSX file. This allows you to specifically name your plant entries. These two options are available in the Plant Entries section on the Trial Detail Page, as shown in the screen shot below. Automatically Generate Plant Entries Clicking on Add plant entries opens the following dialog box. The only input required is the number of plants per plot. This will create plant entries that are named as a concatenation of the plot_name and the plants index number e.g.plot_name_plant_1 Upload Plant Entries Alternatively, you can choose to upload an XLS or XLSX file that contains the names of the plant entries. Clicking on Upload plant entries opens the following dialog box. Clicking on Spreadsheet format will give you information about the XLS or XLSX file to upload. Clicking this will open the following dialog box. This shows you that the files requires the header to contain plot_name and plant_name. The plot_name must exist in the database already and the plant_name must be unique in the database. Along with the file, you must specify number of plants per plot. This is intended to be the total number of plants that were plants. If the file you upload shows three plants in one plot and four plants in another plot, that is fine. 10.2.10 Adding Tissue Sample Entries To Your Trial Some trials require tissue samples to be collected from plants in a field trial. The database will generate these tissue sample identifiers for you and will maintain all relationships with the plant, plot, accession, etc. To begin, go to the Design section of a trials detail page and open the tissue sample entries section. Please note that tissue samples are directly related to plants, therefore your trial requires plants before you can add tissue samples. When you click on Add tissue sample entries you will see a dialog where you specify the number of tissue samples you require per plant. Once you have specified how many tissues samples, you can give specific words to distinguish samples, such as root or stem, as seen below. Once you have added tissue sample entries they will appear in the design section of the trial as seen below. Each tissue sample has a detail page where you can add information about the sample, such as if it is in transit or in storage somewhere. The related stocks section near the bottom of this detail page displays the relationships between all stocks, including tissue samples. 10.2.11 Uploading GPS Coordinates For Plots You can upload GPS coordinates for the plots in your trial. There is a link on the Trial Detail Page as shown below. Clicking on this link will bring up the following dialog. Here you can upload an XLS or XLSX file. To see information on the format of the file that should be uploaded, click on Spreadsheet format. This will bring up the following dialog. This dialog tells you that the file must be XLS or XLSX and must contain: plot_name WGS84_bottom_left_x WGS84_bottom_left_y WGS84_bottom_right_x WGS84_bottom_right_y WGS84_top_right_x WGS84_top_right_y WGS84_top_left_x WGS84_top_left_y The GPS coordinates should be WGS84 format and specify a four-pointed polygon around the plot. 10.2.12 Uploading Additional Files To Trial It may be of interest to you to upload additional documents, images, or recordings to your trial. To do this, scroll down to the Uploaded Additional File section on the trial detail page. From here you can view and download any of these additional files. To upload an additional file, click on the Upload Additional Files link. A dialog will appear where you simply select your desired file. For information, you can click Upload information to see the following message. 10.3 Updating Trial Data To updated the trial-level metadata (such as the planting date, design type, description, etc) of one or more existing trials, click the Update Existing Trial(s) button from the Manage > Field Trials page. This upload can also be used to rename trials or move trials to a different breeding program. In order to update a trial, you must be a curator or a submitter (that is associated with the breeding program of the trials). Here you can upload a file that contains the new metadata for the existing trials in the database. The first column is labeled trial_name and includes the name of the existing trial. Additional columns can be included for the metadata you want to update. Any columns not included in the file or values left blank will leave the existing metadata unchanged. The columns that can be included are: new_trial_name: A new name for the trial, must not already exist in the database breeding_program: The name of breeding program that managed the trial, must exist in the database. location: The name or abbreviation of the location where the trial was held, must exist in the database. year: The year the trial was held. transplanting_date: The transplanting_date of the trial was conducted. Date in YYYY-MM-DD format or remove to remove the date planting_date: Date of Planting in YYYY-MM-DD format or remove to remove the date harvest_date: Date of Harvest in YYYY-MM-DD format or remove to remove the date design_type: The shorthand for the design type, must exist in the database. Possible values include CRD: Completely Randomized Design, RCBD: Randomized Complete Block Design, RRC: Resolvable Row-Column, DRRC: Doubly-Resolvable Row-Column, ARC: Augmented Row-Column, Alpha: Alpha Lattice Design, Lattice: Lattice Design, Augmented: Augmented Design, MAD: Modified Augmented Design, greenhouse: undesigned Nursery/Greenhouse, splitplot: Split Plot, p-rep: Partially Replicated, Westcott: Westcott Design description: Additional text with any other relevant information about the trial. trial_type: The name of the trial type, must exist in the database. Possible values include Seedling Nursery, phenotyping_trial, Advanced Yield Trial, Preliminary Yield Trial, Uniform Yield Trial, Variety Release Trial, Clonal Evaluation, genetic_gain_trial, storage_trial, heterosis_trial, health_status_trial, grafting_trial, Screen House, Seed Multiplication, crossing_block_trial, Specialty Trial plot_width: plot width in meters plot_length: plot length in meters field_size: field size in hectares 10.4 Deleting Trial Data To delete a trial data, click on the Delete trial data section. There are links to delete traits, layout and trial entry data. To delete assayed trait data, click on Delete trait data link. On the appeared dialog, confirm deletion by clicking on the Select Traits For Deletion button, then select one or more traits to delete from the trial. To delete trial layout data, click on the Delete layout data link. Confirm deletion on the appeared dialog. To Delete trial entry, click on Delete trial entry link. Confirm deletion on the appeared dialog. "],["managing-genotyping-plates.html", "Chapter 11 Managing Genotyping Plates 11.1 Adding a New Genotyping Plate 11.2 Genotyping Plate Detail Page", " Chapter 11 Managing Genotyping Plates Genotyping Plates represent the content of a genotyping plate sent to a genotyping facility (e.g.samples in specific wells). To streamline this process, it is possible to upload this information or let the database create a plate for you. Once the genotyping plate is saved in the database it is then possible to export the information directly to genotyping facilities that are BrAPI compliant. The genotyping facility can then provide status information to us via BrAPI. To begin go to Manage->Genotyping Plates. Here the genotyping plates are divided by Breeding Program. These sections can be expanded by clicking on one. 11.1 Adding a New Genotyping Plate To begin, click on Add Genotyping Plate. Notice that this form is split into three sections: Plate Information, Well Information, and Confirm. The first section is for defining information about the genotyping plate, such as a Plate identifier, plate format (96 well), etc. The second section is for defining the samples in the wells, such as sample names, sample concentrations, well position, etc. The final section is for Submitting the info. All fields in the Plate Information section are required. In the Well Information section you can choose to either 1) Upload an XLS or XLSX spreadsheet with your sample layout or 2) let the database create the sample layout. If you choose to upload an XLS or XLSX spreadsheet, the Spreadsheet Template info requires the following: In either case, the sample identifier is generally a concatenation of Plate name and well position, e.g.MyGenotypingTrial1_A01. In either case, you need to provide a source_observation_unit_name for each sample. This can be a tissue sample name, a plant name, a plot name, or an accession name; however, in any case, the identifier must already exist in the database. This allows us to link the sample in the well to specific field trial plots, or, plants, or tissue_samples. If you only know which accession is in the well, you can use the accession name. In the final Confirm section you can decide whether to submit this information to the genotyping facility you selected. This requires that the genotyping facility is BrAPI compliant to work. 11.2 Genotyping Plate Detail Page If you open a specific genotyping plate, it will take you to the detail page. Here you can see the Accessions used in the plate (if you created the trial and the source_observation_unit_names you used were plots, this will still work because we know the accession of the plot or plant or tissue sample). Further down you can see a graphical representation of your plate with well positions. This can be 96 well or 384 well depending on your plate format. "],["using-fieldbook-app.html", "Chapter 12 Using Field Book App 12.1 A typical workflow 12.2 Creating Field Layout Files for the Field Book App 12.3 Creating Trait Files for the Field Book App 12.4 Transferring Files from Your Computer to Android Tablet 12.5 Setting up Field Book App for data collection 12.6 Exporting Files from Field Book App 12.7 Uploading Phenotype Files to an SGN database", " Chapter 12 Using Field Book App SGN databases support the Android Field Book App for collecting phenotypic data in the field with tablet computers. The app is available here: https://play.google.com/store/apps/details?id=com.fieldbook.tracker The app can also be downloaded directly from the Google Play store. There is no charge for the app. Field Book App requires two files for collecting data: Field layout file and trait file. SGN databases can generate the field layout file and trait file, which can be downloaded onto your computer, then transferred to an Android tablet device. 12.1 A typical workflow Creating a field layout file based on the design of field trial Creating a trait file from the list of traits Downloading the field layout file and trait file from the database to your computer Downloading the field layout file and trait file to the tablet (where the Field Book App is installed) Collecting phenotypes Exporting phenotypes from Field Book App to your computer Uploading the exported phenotype file from your computer to the database 12.2 Creating Field Layout Files for the Field Book App There are two alternative methods for creating Field Layout Files. Using Field Book Tools page Using Trial Detail page. 12.2.1 Creating Field Layout Files by using Field Book Tools page. To access Field Book Tools page, clicking on Field Book App in the Manage menu. On the Field Book Tools page, clicking on New On the Download Fieldbook window, selecting trial name and data level (plots or plants), then clicking on Submit button. A treatment can be selected, which allows you to record phenotypes based on treatment application. A list of traits can be selected, which provides a summary of an accessions global performance for those traits in the Fieldbook. If the field book layout file was successfully created, a pop-up window will indicate that the field book layout file was saved successfully. Clicking on the file name will immediately download the file onto your computer. The file is also available to download on the Field Book Tools page, if you need to re-download it. To download field layout file to your computer, clicking on Download File, the file can then be transferred to your tablet. If you no longer want to keep the field layout file, clicking on Delete Layout File. 12.2.2 Creating Field Layout Files by using Trial Detail page. To create Field Layout Files, go to the Trial Detail page of the trial that you want to create the file. On the Trial Detail page, scrolling down to the bottom of the page to find Android Field Book Layout in the Files section, then clicking on the Create Field Book link. Clicking on the Create Field Book link will open a new window showing the name of the trial that you selected, as well as data level (plots or plants). A treatment can be selected, which allows you to record phenotypes based on treatment application. A list of traits can be selected, which provides a summary of an accessions global performance for those traits in the Fieldbook. To proceed, clicking on Submit button. If the field book layout file was successfully created, a pop-up window will indicate that the field book layout file was saved successfully. Clicking on the file name will immediately download the file onto your computer. The file is also available to download on the Field Book Tools page, if you need to re-download it. To download field layout file to your computer, clicking on Download File, the file can then be transferred to your tablet. If you no longer want to keep the field layout file, clicking on Delete Layout File. 12.3 Creating Trait Files for the Field Book App Steps to Create a Trait File: 12.3.1 Creating a Trait List After you logged in, lists can be created and managed using the Search Wizard or the Lists link. For more information on how to create lists, click here. 12.3.2 Creating a Trait File After you have your trait list, clicking on the Field Book App link found under the Manage menu tab. This will take you to the Field Book Tools page. To create a new trait file, finding the heading Trait Files, then clicking on the New link. Clicking on the New link will open a dialogue box titled Create Trait File. Please enter your Trait file name and select List of traits to include from drop-down list that you previously created. You can only use traits included in the list. Check the box titled Include Notes Trait if you would also like to record and upload general plot notes in the field. Click OK to submit. If your trait file was successfully created, a new window will indicate that the trait file was saved, then clicking on Close. After the trait file was saved, you will see your file listed in the Field Book Tools page. Clicking on Download link to download the trait file to your computer. After downloading the trait file to your computer, the file can be transferred to an Android Tablet. You need the Android Field Book App to open the file. The Android Field Book App can be downloaded at: http://www.wheatgenetics.org/bioinformatics/22-android-field-book 12.4 Transferring Files from Your Computer to Android Tablet 12.4.1 Files on your computer After downloading, Field Layout files and Trait files can be found in the Downloads folder of your computer. Field Layout files on your computer will have a prefix fieldbook_layout_ added to the beginning of the file name. For example: 2014-01-28_19:14:34_Trial Demo_location 6767.xls on the the database website will be saved as field_book_layout_2014-01-28_19:14:34_Trial Demo_location 6767.xls on your computer. The files can be transferred to Android tablet by copying the files into the tablets Internal Storage File. 12.4.2 Files on your Android tablet To transfer Field Layout file and Trait file to your Android tablet, connecting an Android tablet to your computer, then clicking on tablet icon on your computer. Clicking on the tablet icon will open a window showing an Internal Storage file. After you installed the Android Field Book App, all files for the app are stored in the fieldBook folder within the Internal storage folder. Within the fieldBook folder, there are five sub-folders: field_export field_import plot_data resources trait Field Layout files must be copied into the field_import folder. Trait files must be copied into the trait folder. You can either drag and drop, or copy the Field Layout file and the Trait file from your computer to the folders in your Android tablet. 12.5 Setting up Field Book App for data collection After you transferred the Field Layout file and Trait file from your computer to Android tablet, you still need to set up Field Book App on your tablet for data collection. To set up the Field Book App: To open the Field Book App in the Android Tablet, clicking on the Field Book App icon, which is a green rectangle. To import Field Layout files, clicking on the Fields section of the main menu of the Field Book App. Clicking on the Fields tab will open a new dialogue that will let you select the file that you want to import. Choosing a Field File will generate a new dialogue that will ask you to choose between an Excel or CSV format. Since the data from the database is in Excel format, choose the Excel option. After submitting the file format, a final dialogue box will appear. Please provide information about the file that you want to import. Please ensure that plot_name is set as the unique identifier. To finalize the process, clicking OK button. To import Trait Files, clicking on the Traits tab on the main menu of the Field Book App. Then, clicking on the three dots symbol found on the upper right corner of the Field Book screen. This will open a drop down menu with the choices Import and Export. Clicking on Import Clicking on import will open a new dialogue that displays a list of trait files that you can select to import to the Field Book App. The trait file is now imported into the Field Book App. The traits page will show all trait files and available traits. 12.6 Exporting Files from Field Book App Data that were collected on the Field Book App can be exported back to your tablet folder, which can then be transferred to your computer. To export files containing data from the Field Book App to your tablet, clicking on the Export link on the main menu page of the Field Book App. Clicking on the Export link will open a new dialogue window. To ensure that data are exported in a correct format for the database, checking the Database Format box, then clicking on OK button. The exported file can then be found in the field_export sub-folder within the fieldBook folder on your tablet. Once you connect your tablet to your computer, you can directly transfer the file to your computer. 12.7 Uploading Phenotype Files to an SGN database To upload phenotype files to the database, clicking on Field Book App in the Manage menu. On the Field Book Tools page, clicking on Upload link in the Uploaded Phenotype Files section. Clicking on the Upload link will open a new dialogue asking you to choose a file that you want to upload to the database website. Please ensure that plot_name is the first column of the file to be uploaded. To make sure that the file has the correct format for uploading, click on the Verify button. After the file format has been verified, click on the Store button. The list of uploaded phenotype files can be found on the Field Book Tools page The uploaded files will also be seen in the corresponding Trial Detail page. "],["managing-phenotypic-data.html", "Chapter 13 Managing Phenotypic Data 13.1 Uploading Fieldbook Phenotypes 13.2 Uploading Spreadsheet Phenotypes", " Chapter 13 Managing Phenotypic Data To facilitate uploading process for phenotypic data, Manage Phenotypic Data page provides two options for uploading: Field Book Phenotype file in database format and phenotype file in Excel (.xls or .xlsx) file format. To access Manage Phenotypic Data page, clicking on Phenotyping in the Manage menu. 13.1 Uploading Fieldbook Phenotypes 13.1.1 Export Field Book Database File The database upload of Field Book phenotype data relies on the Database format from the Field Book. Please make sure to export the Database format from the Field Book if you intend to upload the data using the Field Book Upload we describe below. If you prefer to use the Table format that the Field Book exports, you can modify this format to work with the Speadsheet Upload we describe below. 13.1.2 Upload Field Book Database File To upload a Field Book Phenotype file in a database format, click the Upload Fieldbook link The Upload Fieldbook link on this page and Upload link on the Field Book Tools page open the same dialogue. Please follow instructions for uploading phenotypic files on the 12 page. 13.2 Uploading Spreadsheet Phenotypes To upload a phenotype file in an Excel (.xls or .xlsx) file format, click the Upload Spreadsheet link. Please specify Data Level (Plots or Plants) and select the Excel file that you want to upload. 13.2.1 Generating Spreadsheet File You can find more file format information by clicking on Spreadsheet Format link. Clicking on Spreadsheet Format will open the following dialog. Clicking on Create Phenotyping Spreadsheet will bring up a dialog where you can indicate the trial(s) you are interested in and the trait list you are interested in. Clicking Submit will download the xlsx file onto your computer, where you can then fill in the phenotypes. 13.2.2 Uploading Spreadsheet File To ensure that the file has a correct format for uploading, click on the Verify button. This will check the contents of the file and also perform quality checks on the values in the file. These checks include checking the trait definition for categorical values, minimum and maximum values, and data type checking. It will also check if there are already values uploaded for the given observation units and traits. If there are, there is an option to overwrite the existing values with the new values in your file. If the file is valid, only then can you click Store to store the information in the database. "],["managing-barcodes.html", "Chapter 14 Managing Barcodes", " Chapter 14 Managing Barcodes SGN databases provide tools for generating barcodes for stock identification. To access Barcode Tools page, clicking on Barcodes in the Manage menu. Barcode Tools page provides four options for generating barcodes: Single barcode Multiple barcodes Plot phenotyping barcodes Trial barcodes To generate single barcode, clicking on Generate Barcode link on the Barcode Tools page. In the Generate Barcode section, specify the name of the barcode, size of the barcode, then clicking on Generate Barcode The database will generate a barcode for your stock. The barcode can be printed for your stock identification. It also appears on its corresponding stock page. If you have a list of stocks that you want to generate barcodes, you can use Download Stock Barcodes section. You have three options for entering stock names: Typing in stock names, or copy and paste from other file into the box (1) Choosing a list of stocks from your Lists (2), and transferring the list into the box (1) by clicking on paste button. Uploading a Tab-delimited Text File with stock names. Select an optional printing format from the available formats. You can select printer settings that you prefer in the Printer Settings section. After you enter stock names and specify printer settings, clicking on Download Barcodes button at the bottom of the page. If you have a list of plots that you want to generate phenotyping barcodes, you can use Download Plot Phenotyping Barcodes section. You have three options for entering plot names: Typing in plot names, or copy and paste from other file into the box (1) Choosing a list of plots from your Lists (2), and transferring the list into the box (1) by clicking on paste button. Uploading a Tab-delimited Text File with plot names. If you have a list of trials that you want to generate barcodes, you can use Download Trial Barcodes section. You have three options for entering trial names: Typing in trial names, or copy and paste from other file into the box (1) Choosing a list of trial from your Lists (2), and transferring the list into the box (1) by clicking on paste button. Uploading a Tab-delimited Text File with trial names. "],["using-the-label-designer.html", "Chapter 15 Using the Label Designer", " Chapter 15 Using the Label Designer Breedbase provides an interactive design tool for creating custom labels. To access the Label Desginer, click on Label Designer in the Manage menu. The following sections explain your many options as you advance through each step of the design workflow. 15.0.1 First Select a Datasource The first step is to select a data source. Since the label designer can generate labels for different data types, you can optionally filter the source selection by the data type youre interested in. Then, select a field, genotyping, or crossing trial to populate your labels with the trial design information. Or select a list to populate your label with the list contents. For data sources with multiple levels of information you will also be asked to pick a level (plot, plant, etc.) before proceeding. To generate plot-level labels for more than one trial at once, select a list of trials as the source and plot as the level. 15.0.2 Set Page and Label Size Now choose whether to create a new design or load a saved design. If you choose new, you will be prompted to select a page size and label size. If you do not see your page or label size as an option, then select Custom and enter your desired dimensions in pixels, or 1/72nds of an inch. If you choose saved, you will be prompted to select a saved design then will be taken directly to the design step with the saved design elements preloaded. 15.0.3 Design Your Label Below is a draw area where you can begin adding elements to your label. First select a type, then field, size, and font, then click Add You can add text to an exisiting field or create a completely custom field by clicking Create Custom Field Once added, you can drag and drop elements, or delete them by clicking on the red box in their upper left corners. Barcodes can also be resized by dragging on the green box in their lower right corners. If you are creating labels for a trial it is highly recommended to include a barcode encoding your plot, plant, or tissue sample names. These are your unique identifiers that will need to included with any phenotypic or genotypic measurements loaded into the database. When you are satisfied with your design, click next! 15.0.4 Adjust Formatting, Save, and Download Last step! Here you can tweak your formatting and page layout, save your design, or download your labels. The additional settings dialog will allow you to fine tune the print margins and margins between labels. The units are pixels or 1/72nds of an inch. Its not recommended to change these until youve already done a test print. You can also set the # of copies per label, filter by rep, or download just the first page for test purposes. To save youre design just type a unique name and hit save. This will save your design to your list manager where you can set it to public to share it with others. Finally if you are ready just hit download to generate and download your labels! "],["managing-downloads.html", "Chapter 16 Managing Downloads", " Chapter 16 Managing Downloads You can download phenotype, trial meta-data, pedigree, GBS genotype and GBS genotype QC files from the database to your computer by using Lists. To download, clicking on Download in the Manage menu. For each category, you can select a list of accessions from your Lists to download their phenotypes, pedigree, GBS genotype, GBS genotype QC. In the case of downloading trial meta-data, you would provide a list of trials, while for downloading phenotype and GBS genotype QC, you can also use a list of trials or traits. "],["managing-odk-data-collection.html", "Chapter 17 Managing ODK Data Collection 17.1 ONA Crossing Information", " Chapter 17 Managing ODK Data Collection To access this page go to Manage and then ODK Data Collection. ODK is used for remotely collecting data on Android and IOS devices. We currently are working to support two ODK service providers, namely ONA and SMAP. We are using ONA to collect crossing information, including all lab activities following seed production. We are using SMAP for phenotypic data collection. 17.1 ONA Crossing Information 17.1.1 Managing ONA Crossing Information To begin collecting data using the ONA ODK form you must first have a crossing plan in the form of a Cross Wishlist. To do this from this page, click the Export Cross Wishlist to ONA button. Please refer to the Create Cross Wihlist help section for more information. It is possible to view the current available cross wishlists by clicking the Export Cross Wishlist to ONA button and then clicking Available Cross Wishlists. Once your cross wishlist is available, you can use your mobile ODK application to record crosses being done realtime. You can also record all laboratory activities following seed extraction up to greenhouse plantlet hardening. As you collect data using your mobile ODK application, your responses will be synchronized with our database. The Schedule Import for Selected Form section gives you options to perform the import daily or more frequently. It is also possible to initiate a data import from ONA at anytime by clicking Import Crossing Data from Selected Form on ONA. 17.1.2 Reviewing Plant Status The mobile ODK application has options to collect information about the status of plants in the field, such as if they are flowering. Images for each plant can also be recorded. The database will report this information here in a summary table that looks like the following. Notice that images are also transferred to the database. 17.1.3 Graphical Summary For Performed Crosses There is a section to summarize activities done for each cross. In this table each row represents a single cross performed. All the activities that have been performed will be shown here, such as first pollination and embryo rescue. The scatter plot shown tracks seed numbers generated on the Y axis and date of activity on the X axis. 17.1.4 Summary Information For Performed Crosses There is a secondary section to summarize what has been done across the entire Cross Wishlist. This tree structure shows all activities performed for a cross and shows how these crosses relate to the Cross Wishlist. "],["managing-tissue-samples.html", "Chapter 18 Managing Tissue Samples 18.1 Tissue samples from field trials 18.2 Genotyping Plate Tissue Samples (96 or 384 well plates)", " Chapter 18 Managing Tissue Samples To access this page go to Manage and then Tissue Samples. 18.1 Tissue samples from field trials A field trial contains plots planted with a specific accession. Each plot can contain many plants, which in turn can contain many tissue samples. On the manage tissue sample page we can see the field trials that contain tissue samples already. We can choose to download the tissue sample layout as seen in the below picture. If the field trial you want to collect tissue samples from is not in the above table, you can click the button highlighted below. Once you have clicked this button, you will enter a workflow that begins with the following introduction. Once you click next, you will need to select your trial. Next, if your trial currently only has plot entries saved, you will be asked to enter how many plants are in each plot. Finally you will be asked how many tissue samples you want for each plant. You can specify a string to include in the tissue sample name, such as leaf or root. Afterwards you should see the following success message, indicating that the tissue samples are saved. 18.2 Genotyping Plate Tissue Samples (96 or 384 well plates) A genotyping plate represents a 96 or 384 well plate. You can use the Coordinate Android application to create your plate layout, or you can upload your own Excel plate layout, or you can use the database to generate a plate layout. Ideally, you will use tissue sample names originating from a field trial as the source for each well tissue sample, but you can also use plant names, plot names, or accession names. From the manage tissue samples page, you can see the genotyping plates saved in the database. You can also download the layouts as shown below. If you need to create a new genotyping plate, you can click the button shown below. This will guide you through a workflow for uploading or creating the new plate layout. Genotyping vendors require you to send a plate layout during submission. You can download the plate layout as shown above, or you can go to a genotyping plate detail page to download the Intertek formatted file. In the future you will be able to directly export your genotyping plate plate layout to vendors. "],["managing-observation-variables.html", "Chapter 19 Managing Observation Variables 19.1 Managing Observation Variables with Traits, Methods, and Scales", " Chapter 19 Managing Observation Variables 19.1 Managing Observation Variables with Traits, Methods, and Scales Observation variables are the identifiers used when collecting phenotypic data. An observation variable is composed of a trait, a method, and a scale. The trait describes the attribute being measured e.g.Plant Height. The method defines the protocol in which the trait was observed e.g.Using a one meter long measuring stick. The scale defines the units or dimensions for which the measurement was taken e.g.Meters. Generally, observation variables are defined in ontologies that are predefined. We often use ontologies from cropontology.org. In this case, you will not be able to define your own observation variables directly; instead, you will need to contact us and we will add the observation variable for you. For databases where the user has greater control, we have an interface to allow addition of observation variables, along with traits, methods, and scales. To begin, go to the Search->Traits page. If the database you are on allows you to directly add observation variables, you will see the following button at the bottom of the page. When you click the button, the following workflow will appear. You should be logged in or else it will not allow addition of the observation variable. The workflow begins with an introduction. On the next workflow step, you select the ontology that you want to insert the new observation variable into. You must also give a name and a definition for the new observation variable. On the next workflow step, you select the trait ontology to use. Once you select a trait ontology, a select containing all the terms in the selected ontology will appear. You can either select a trait or if it does not exist in the select, you can create a new one by giving a name and a definition for the new trait. On the next workflow step, you select the method ontology to use. Once you select a method ontology, a select containing all the terms in the selected ontology will appear. You can either select a method or if it does not exist in the select, you can create a new one by giving a name and a definition for the new method. On the next workflow step, you select the scale ontology to use. Once you select a scale ontology, a select containing all the terms in the selected ontology will appear. You can either select a scale or if it does not exist in the select, you can create a new one by giving a name and a definition for the new scale. You can also define a format, minimum, maximum, categories, and default value for the new scale. On the last page of the workflow, you confirm the submission. Afterwards, you can use the newly created observation variable ontology term in your phenotyping. "],["managing-image-data.html", "Chapter 20 Managing Image Data 20.1 Image-Phenotyping Dashboard 20.2 Image Input 20.3 Standard Process 20.4 Ground Control Points", " Chapter 20 Managing Image Data 20.1 Image-Phenotyping Dashboard Upload raw image-captures in a compressed file (.zip) for orthophotomosaic assembly or upload previously stitched orthophotomosaic raster (.PNG, .JPG) imagery. Dashboard shows all field trials and uploaded imaging events in collapsible sections. Follow standard processes to manually create templates for assignment of plot-polygon images to the field experiment design. All imagery is shown with the spectral category within collapsible sections. Figure shows NIR imagery. Apply Fourier transform filtering, thresholding, and vegetation index masking. Plot-polygon images for all image processes are shown. Extract and export phenotypic values from plot-polygon images for analyses and model training. 20.2 Image Input Clicking Upload Imagery will open the following dialog. Raw-captures can be uploaded in a compressed (.zip) file so that they can be assembled into an orthophotomosaic. If orthophotomosaic assembly is not required, raster images (.PNG, .JPG) can be uploaded. Example data is given for raw Micasense RedEdge 5-band multispectral captures and for stitched orthophotomosaics. To begin uploading images, a field trial must be selected. The field trial must already be saved in the database. For information about adding a field trial, please read the Field Trial documentation. The image data is added to an imaging (drone run) event. Here you can select a previously saved imaging event or you can create a new one by defining a name, description, and date. The uploaded data can be raw image-captures or complete raster images. Here you can select whether orthophotomosaic stitching is required. In the case that orthophotomosaic stitching is required, select yes. On the next step you will see the following: Upload a zipfile with the raw-captures. When uploading Micasense RedEdge raw-captures, provide images of the Micasense calibration panels in a zipfile as well. In the case that orthophotomosaic assembly is not required, simple upload the raster images. Select the number of image bands that will be uploaded e.g.for a five band multispectral camera, select 5. In the caes that orthophotomosaic stitching is not required, select no. On the next step you will see the following: Upload an image at each band with a unique name, description, and spectral type. 20.3 Standard Process Once imagery is uploaded, it will appear on the dashboard under the field trial. Clicking the Run Standard Process button will begin extracting plot-polygon phenotypes from the imagery. Clicking the button will open the following dialog. Select a drone run band to use in this process. In the case of the Micasense 5 band multispectral camera there will be 5 bands shown here; select the NIR channel in this case because it has the highest contrast. In the case of standard color images, there will only be the RGB Color Image option here. Rotate the image so that there the plots are oriented in a grid fashion. There can be a skew in the field layout, as seen in the following example. Perform a rough cropping of the image by clicking on the four corners of the field. Cropping is important to remove any extraneous parts of the image. This step shows a histogram of the cropped image. The standard process will magnitude threshold the top and low ends of the distribution. In this step, the template for the plot polygons in the experimental field design are associated to the image. First, defined the number of rows and columns in the field experiment. Then click the four corners of the image, in respect to the top right, top left, botton left, and bottom right positions. Next click on Draw Plot Polygon Template. Review the template and clear/repeat the process until the template matches well. It is possible to copy/paste templates in the case where there are large breaks in the field design. Next, scroll down to the assign Plot Polygons to Field Trial Entities section. Select the location of Plot Number 1 as either top left or top right and whether the field design is serpentine or zigzag. Click on Generate Assignments and review that the names of the plots appear correctly in the overlay on the image. Finally, click Finish and Save Polygons to Plots when you have have confirmed the assignments. Next, the dialog shows you that the standard process will be repeated for all uploaded image bands. Next, choose which vegetation indices to apply. Next, choose the phenotypic values to extract. You must define the time point for which the phenotype is; if the field trial has a planting date, the time point will automatically be populated as image date minus the planting date. After completing the standard process, the job will continue in the background until it completes. You can check the status of the job from the dashboard. 20.4 Ground Control Points Ground control points can be saved after an imaging event has undergone the standard process on orhomosaics. Ground control points can then be used across imaging events on the same field experiment in order to automate the entire standard process. "],["managing-vcf-data.html", "Chapter 21 Managing VCF Data 21.1 Uploading VCF Data 21.2 Searching and Downloading VCF Data 21.3 Searching Protocols 21.4 Detail Pages and Deletion", " Chapter 21 Managing VCF Data 21.1 Uploading VCF Data Genotyping data in VCF can be loaded from the web-interface. Breedbase can store any genotypic variants from a VCF, allowing for polyploids, structural variants, etc. without problems. To begin go to Manage->Genotyping Plates and click the button seen below: Note that you do not need to have genotyping plates uploaded to upload VCF data; you may upload genotyping data to accessions or you can upload genotyping data for tissue samples in genotyping plates. The workflow begins with an intro: On the following step in the workflow, a genotyping project is defined or selected. A genotyping project is a high-level entity for grouping several genotyping events. It is defined with a name, description, name, breeding program, and genotyping facility (IGD, Intertek, etc.). The following step is to define or select a genotyping protocol. A genotyping protocol represents the set of markers being called against a specific reference genome. A genotyping protocol is defined with a name, description, reference genome name, species name, and a location of data generation. Note in the picture that you can select whether the samples in your file are accessions or tissue samples in the database; tissue samples are for when a genotyping plate is stored in the database. There is an option to parse the sample names for appended sequencing numbers from IGD, where the sample names are like accession:igdnumber. The final step is to select the VCF from your computer and upload it. The web interface can be used to upload files arbitrarily large; it is a NGINX configuration to set this size. 21.2 Searching and Downloading VCF Data The Search Wizard is the primary means of querying data in the database. Go to Search->Wizard to begin. Once genotyping protocols are stored, select Genotyping Protocols from the first dropdown menu. Then if you select one or more and select Accessions from the second dropdown menu, you will see the accessions for which genotypes were stored. As seen in the following picture, there is a section for filtering genotypes by chromosome, start position, and end position. Genotypes can be downloaded in VCF or DosageMatrix formats. Using the Default genotyping protocol which is configured in a system, you can query over field phenotypic evaluations before downloading genotypes and phenotypes. 21.3 Searching Protocols Genotyping protocols can be search by going to Search->Genotyping Protocols. To download genotypes accessions must be selected, though any combination of search criteria can be used to filter and select those accessions. If a genotyping protocol is not selected, then the default genotyping protocol set in the configuration will be used. Genotyping protocols can also be selected in the wizard. The genotyping download menu on the Search Wizard presents options for filtering by chromosome, start position, and end position. Genotypes can be downloaded in VCF of Dosage Matrix formats. The genomic relationship matrix (GRM) can be downloaded for the selected accessions in a tab-delimited matrix format or in a three-column format that is useful in Asreml. Genotypes can be computed from the parents in the pedigree if those parents are genotyped by clicking on the compute from parents checkbox. Additionally, the GRM can be computed using genotypes of parents in the pedigree if the compute from parents checkbox is selected. As is described elsewhere, the Search Wizard presents a way to filter phenotypic values by minimum and maximum values, and allow for download in CSV and Excel formats. 21.4 Detail Pages and Deletion The genotyping protocol detail page will show all information about the protocol such as the reference genome used, the header information lines in the uploaded VCF file, the markers involved, and the samples genotyped. The markers section will show all markers used and their annotations, such as position, chromosome, alternate allele, reference allele, marker format, etc. The samples section will show all samples genotyped. Notice the Download links in the table which can be used to easily get the VCF file results for each genotyped samples with all markers in the genotyping protocol. For getting mulitple samples at once, use the Search Wizard as discussed above. The genotyping protocol and all associated genotyping data can be deleted from the genotyping protocol page. "],["managing-spectral-data.html", "Chapter 22 Managing Spectral Data 22.1 Upload Spectral Data 22.2 Evaluate and Remove Outliers 22.3 Plot Spectra 22.4 Aggregate Spectra 22.5 References", " Chapter 22 Managing Spectral Data Breedbase has implemented a flexible spectral data storage protocol that handles spectral data irrespective of the source spectrometer. Spectral data storage and analysis in Breedbase makes use of the R package waves for outlier identification, plotting, sample aggregation, and prediction model training. 22.1 Upload Spectral Data Spectral data can be added as a CSV file that includes metadata in the leftmost columns followed by one column per spectral measurement to the right. Rows represent a single scan or sample, each with a unique ID that must match to a Breedbase observationUnitName. Future data transfer using BrAPI will allow for interoperability with data collection software. To upload a spectral dataset, navigate to the Manage NIRS Data page by selecting NIRS in the Manage menu and click the blue Upload NIRS button. This will open an upload workflow. A link to the required file format and an example .csv file can be found by clicking in the light blue info box in this workflow. Another example of the file format is shown below. id: Optional identifier for each NIRS read. The id must be an integer. sampling_id: Optional identifier for each sample. Strings are allowed. sampling_date: Optional field. The format allowed is: YYYY-MM-DD. observationunit_name: Required field that matches existing data in the database. It can be the plot name, subplots, plant name, or tissue sample, depending how your trial is designed. device_id: Optional field to identify your device. Strings are allowed. device_type: Required field. It is possible upload data for a single device type. They can be: SCiO, QST, Foss6500, BunchiN500, or LinkSquare. comments: Optional field for general comments. All other columns are required wavelengths. You can add how many columns you want upload there is no limit. 22.2 Evaluate and Remove Outliers Spectral calibration models can be heavily affected by the presence of outliers, whether they come from spectrometer spectral artifacts or user errors. Mahalanobis distance (Mahalanobis, 1936) is a measure of the distance between a single observation and a larger distribution and is commonly used in the identification of outliers in a multivariate space (Des Maesschalck et al, 2000). The FilterSpectra() function in the R package waves calculates the Mahalanobis distance of each observation in a given spectral matrix using the stats::mahalanobis() function. Observations are identified as outliers if the squared distance is greater than the 95th percentile of a \\(\\chi\\)2-distribution with p degrees of freedom, where p is the number of columns (wavelengths) in the spectral matrix (Johnson and Wichern, 2007). In Breedbase, this procedure is applied on a per-dataset basis on upload and outliers are given binary tags Outlier. 22.3 Plot Spectra After outlier identification, a plot is generated using the PlotSpectra() function in waves. This function uses the filtered spectra and ggplot2::ggplot() to create a line plot with outliers highlighted by color. A list of rows identified as outliers are shown beneath the plot. Plots are saved as .png files and linked to the original input datasets. Plot image files can be downloaded via the Download Plot button in the upload workflow. 22.4 Aggregate Spectra To obtain a stable and reliable spectral profile, most spectrometer manufacturers recommend that multiple spectral scans are captured for each sample. While some spectrometers aggregate these scans internally, many do not, requiring the user to do so before analysis can take place. Breedbase handles these cases upon data upload following filtering steps by calling the AggregateSpectra() function from waves, saving the aggregated scans for future access through the search wizard feature. Scans are aggregated by sample mean (e.g.plot-level basis) according to the provided observationUnitName field. After aggregation, the user exits the upload workflow and the raw data file is saved in the upload archive. 22.5 References De Maesschalck, R., Jouan-Rimbaud, D., and Massart, D. L. (2000). The Mahalanobis distance. Chemom. Intell. Lab. Syst. 50(1): 1-18. Johnson, R. A. & Wichern, D. W. (2007). Applied Multivariate Statistical Analysis (6th Edition). p 773. Mahalanobis, P. C. (1936). On the generalized distance in statistics. National Institute of Science of India. Analysis tool documentation "],["managing-sequence-metadata.html", "Chapter 23 Managing Sequence Metadata 23.1 What is Sequence Metadata? 23.2 Loading Sequence Metadata 23.3 Searching Sequence Metadata 23.4 Marker Integration 23.5 Sequence Metadata API", " Chapter 23 Managing Sequence Metadata 23.1 What is Sequence Metadata? Sequence Metadata is a feature that allows for the efficient storage and retrieval of sequence annotations for a specific region along a reference genome. The annotation data can contain a primary score value and any number of secondary key/value attribute data. For example, Sequence Metatadata can store MNase open chromatin scores for every 10 basepairs along the reference genome as well as genome-wide association study (GWAS) statistics, including the trait information associated with the result. This data can then be filtered by position and/or scores/attribute values and even cross-referenced with markers stored in the database. 23.2 Loading Sequence Metadata Sequence Metadata can be loaded into the database using a gff3-formatted file. The following columns are used to load the data: #1 / seqid: The name of the database feature (ie chromosome) the metadata is associated with (The feature name must already exist as a feature in the database) #4 / start: The metadatas start position #5 / end: The metadatas end position #6 / score: (optional) The primary score attribute of the metadata #9 / attributes: (optional) Secondary key//value attributes to be saved with the score. These should be formatted using the gff3 standard (key1=value1;key2=value2). The attribute key cannot be either score, start, or end. To upload the gff3 file: Go to the Manage > Sequence Metadata page Click the Upload Sequence Metadata button On Step 2 of the Wizard, select the Type of data to be uploaded This groups similar datasets together in the same Data Type category On Step 3 of the Wizard, select an existing Protocol or create a new one The Protocol is used to describe how the data was generated and define the score value and any secondary attributes. Adding the attributes (and their descriptions) to the Protocol will allow the Sequence Metadata queries to filter the data based on the value of one or more of these attributes. Attributes not defined in the Protocol will still be stored and displayed on retrieval, but will not be able to be used in a search filter. Finally, select and upload your gff3 file to the database. The database will verify the format of the file before its contents are stored. 23.3 Searching Sequence Metadata To retrieve stored Sequence Metadata, go to the Search > Sequence Metadata page. 23.3.1 Basic Search The basic Sequence Metadata search options include selecting the reference genome and species, the chromosome, and (optionally) the start and/or end position(s) along the reference genome. In addition, one or more specific protocols can be selected to limit the results. The Sequence Metadata search results are returned as a table, including the chromosome and start/stop positions of the annotation, along with the primary score value and any additional key/value attributes. The markers column will include a list of marker names of any stored markers that are found within the start/stop positions of the Sequence Metadata. The data can be downloaded as a table in an Excel or CSV file or a machine-readable (code-friendly) JSON file. If the Sequence Metadata JBrowse configuration is set, the filtered results can be displayed as a dynamic JBrowse track. 23.3.2 Advanced Search Any number of advanced search filters can be applied to the query. The advanced filters can limit the search results by the value of the primary score and/or any of the secondary attribute values. 23.4 Marker Integration A table of Sequence Metadata annotations are embedded on the Marker/Variant detail page. The table will include any annotations that span the poisiton of the marker (for data of the same reference genome and species). 23.5 Sequence Metadata API A publicly accessible RESTful API (Application Programming Interface) is available to query the database for Sequence Metadata directly from your programming environment (R, python, etc) to be used in analysis. The data is returned in a JSON format. Documentation for the API can be found on the Manage > Sequence Metadata page "],["managing-outliers-in-dataset.html", "Chapter 24 Managing Outliers in Dataset 24.1 What is Outliers Functionality in Dataset ? 24.2 Accessing Trait Visualization 24.3 Interpreting Visual Elements 24.4 Choosing Cut-Off Values 24.5 Setting Deviation Multiplier 24.6 Utilizing Graph Controls", " Chapter 24 Managing Outliers in Dataset 24.1 What is Outliers Functionality in Dataset ? As in step The Search Wizard we can create a dataset. The dataset incorporates a feature to identify outlier points, which we may choose to exclude from a specific dataset. Its important to note that these exclusions only apply at the dataset level, and no data is permanently removed from the database. Additionally, outlier categorization can be modified at any time, and these changes are visible to all other functionalities within the system. Each dataset stores a wholly unique set of outlier points, completely independent of any other dataset in the database. Outliers are specifically designated for traits within datasets, exclusively encompassing phenotype data. If a particular dataset lacks traits as a part of wizard selection, this functionality is not available. Each trait has its own set of defined outliers. 24.2 Accessing Trait Visualization Once youve selected a specific trait, the web application provides access to a visualization of the data points associated with that trait. 24.3 Interpreting Visual Elements Once youve selected a specific trait, the web application provides access to a visualization of the data points associated with that trait. Green Points: As per the legend, represent values for the selected trait that fall below the cut-off point set by the slider. (non-outliers) Black Outlined Points: These data points are outlined with black borders, indicating that they are currently designated as outliers in the database. Red Points: The red data points denote the cut-off points established by the slider for the allowable deviation value. 24.4 Choosing Cut-Off Values You have two fundamental options for setting cut-off points: Median with MAD: This option involves using the median (middle value) along with the Mean Absolute Deviation (MAD) as a reference point for determining cut-off values. Mean with Standard Deviation: Alternatively, you can choose to use the mean (average) in conjunction with the Standard Deviation to set cut-off points. 24.5 Setting Deviation Multiplier The slider allows you to specify the deviation multiplier from a central point, which influences the cut-off values. 24.6 Utilizing Graph Controls Beneath the graph, youll find four buttons, each serving a distinct function: Add selection to outliers: This button enables you to save the current cut-off points to the database for future reference. Reset outliers for current trait: You can use this option to reset outliers for the selected trait. Reset all outliers: This button allows you to reset outliers for the entire dataset. Download Phenotype Table without outliers: You can download the phenotype data table in a comma-separated value format file, using this feature, with outliers excluded for selected dataset. These tools and functions are designed to provide you with control and insights when working with data visualization and outliers. "],["data-analysis-tools.html", "Chapter 25 Data Analysis Tools 25.1 Selection Index 25.2 Genomic Selection 25.3 Genome Browsing 25.4 Principal Component Analysis (PCA) 25.5 ANOVA 25.6 Clustering (K-Means, Hierarchical) 25.7 Genetic Gain 25.8 Kinship and Inbreeding Coefficients 25.9 Creating Crossing Groups 25.10 Search Wizard Genomic Relationship Matrix (GRM) Download 25.11 Search Wizard Genome Wide Association Study (GWAS) 25.12 Spectral Analysis 25.13 General Mixed Model Tool 25.14 Genomic Prediction of Cross Performance (GCPC)", " Chapter 25 Data Analysis Tools SGN databases provides several tools for phenotype data analysis, marker-assisted selection, sequence and expression analyses, as well as ontology browser. These tools can be found in the Analyze menu. 25.1 Selection Index To determine rankings of accessions based on more than one desirable trait, SGN databases provide a Selection Index tool that allows you to specify a weighting on each trait. To access the tool, clicking on Selection Index in the Analyze menu. On the Selection Index page, selecting a trial that you want to analyze. After you selected a trial, you can find traits that were assayed in that trial in the Trait box. Selecting a trait that you want to include in the analysis will open a new dialogue showing the selected trait and a box that you can assign a Weight of that trait. After you are done, you can continue by selecting another trait by clicking on Add another trait link. After you selected another trait, this page will automatically update information for you by showing all of the traits that you selected for the analysis. You also have options to choose a reference accession, choose to include accessions with missing phenotypes, scaling values to a reference accession. After you complete your setting, clicking on Calculate Rankings The Selection Index tool will generate rankings of accessions based on the information that you specified. You can copy the results to your system clipboard, convert the table data to CSV format, or print the data. Clicking on Raw Average will display average values of the phenotypes of those ranked accessions. Selection Index tool also allows you to save top ranked accessions directly to Lists. You can retrieve top ranked accessions by selecting a number or a percent. 25.2 Genomic Selection The prediction of breeding values for a trait is a one step or two steps process, depending on what stage in your breeding cycle you are. The first step is to build a prediction model for a trait using a training population of clones with phenotype and genotype data. If you have yet to select parents for crossing for your first cycle of selection you can use the breeding values of the training population. If you are at later stages of your selection program, you need to do the second step which is applying the prediction model on your selection population. All clones in your training and selection populations must exist in the database. To use the genomic selection tool, on cassavabase.org, select Genomic Selection from the analyze pull-down menu. There are three ways to build a model for a trait. 25.2.1 Building a Model - Method 1: One way to build a model is, using a trait name, to search for trials in which the trait was phenotyped and use a trial or a combination of trials to build a model for the trait. For example, if you search for mosaic disease severity, you will get a list of trials you can use as training populations. You will get a list of trials (as shown below) in which the trait of your interested was phenotyped. From the list, you can use a single trial as a training population or combine several trails to form a training population for the prediction model of the trait. Lets say, you want to create a training population using individuals from trials cassava ibadan 2001/02 and cassava ibadan 02/03 and build a model for cassava mosaic disease severity using all clones from the training population. Select the trials to combine (the same coloured), click done selecting, click the combine trials and build model button, and you will get a model and its output for the trait. On the model detail page, you can view the description of input data used in the model, output from the model and search interface for selection populations the model you can apply to predict their breeding values. The description of the input data for the model includes the number of phenotyped clones, and the number of markers, scatter and frequency distribution plots for the phenotype data, relationship between the phenotype data and GEBVs, population structure. The model output includes model parameters, heritability of the trait , prediction accuracy, GEBVs of the individuals from the training population and marker effects. Expand each section to see detailed information. If you expand the Trait phenotype data section, you will find plots to explore the phenotype data used in the model. You can assess the phenotype data using a scatter and histogram plots and the descriptive statistics. A regression line between observed phenotypes and GEBVs shows the relationship between the two. You can also explore if there is any sub-clustering in the training population using PCA. To check the model accuracy, a 10-fold cross-validation test, expand the model accuracy section. Marker effects are also available for download. To do so, expanad the Marker Effects section and click the Download all marker effects link and you will get a tab delimited output to save on your computer. The breeding values of the individuals used in the training population are displayed graphically. Mousing over each data point displays the clone and its breeding value. To examine better, you can zoom in into the plot by selecting an area on the plot. You can download them also by following the Download all GEBVs link. Estimating breeding values in a selection population If you already have a selection population (in the database), from the same model page, you can apply the model to the selection population and estimate breeding values for all the clones in the population. You can search for a selection population of clones in the database using the search interface or you can make a custom list of clones using the list interface. If you click the search for all relevant selection populations, you will see all relevant selection populations for that model. However, this option takes long time decause of the large set of populations in the database and the filtering. Therefore, the fastest way is to search for each of your selection populations by name. If you are logged in to the website you will also see a list of your custom set of genotyped clones. To apply the model to a selection population, simply click your population name or Predict Now and you will get the predicted breeding values. When you see a name of (or acronym]) of the trait, follow the link and you will see an interactive plot of the breeding values and a link to download the breeding values of your selection population. 25.2.2 Building a Model - Method 2 Another way to build a model is by selecting a trial, instead of selecting and searching for a specific trait. This approach is useful when you know a particular trial that is relevant to the environment you are targeting to breed material for. This method allows you to build models and predict genomic estimated breeding values (GEBVs) for several traits within a single trial at once. You can also calculate selection index for your clones when GEBVs are estimated for multiple traits. To do this select the Genomic Selection link found under the analyze menu. This will take you to the same home page as used with Method 1. However, instead of entering information to search for in Search for a trait, click on Use a trait as a trial population. This will expand a new menu that will show all available trials. To begin creating the model, select the existing trial that you would like to use. In this example I will be using the trial and trait data from Cassava Ibadan 2002/03 trial. Clicking on a trial will take you to a page where you can find information such as number of markers and number of phenotypes clones. In addition to the number of phenotype clones and number of markers, the main page for the trial selected also has information and graphs on phenotypic correlation for all of the traits. By moving your cursor over the graph you can read the different values for correlation between two traits. A key with all of the trait names of the acronyms used can be found in the tab below the graph. Below the Training population summary there is a tab for Traits. Clicking on this tab will show all available traits for the specific trial. You can create a model by choosing one or multiple traits in the trial and clicking Build Model. In this example, the traits for cassava bacterial blight severity and cassava mosaic disease severity have been selected. Clicking on Build Model will take you to a new page with the models outputs for the traits. Under the Genomic Selection Model Output tab you can view the model output and the model accuracy. Clicking on any of the traits will take you to a page with information about the model output on that individual trait within the trial. There you can view all of the trait information that was seen in more detail in Method 1. You can apply the models to simultaneously predict GEBVs for respective traits in a selection population by clicking on Predict Now or the name of the selection population. You can also apply the models to any set of genotyped clones that you can create using the lists feature. For more information on lists, click here. Follow the link to the trait name to view and download the predicted GEBVs for the trait in a selection population. To compare clones based on their performance on multiple traits, you can calculate selection indices using the form below. Choose from the pulldown menu the population with predicted GEBVs for the traits and assign relative weights for each trait. The relative weight of each trait must be between 0 - 1. 0 being of least weight and importance, not wanting to consider that particular trait in selecting a genotype and 1 being a trait that you give highest importance. In this example we will be using the Cassava Ibadan 2002/03 population and assigning values to each of the traits. Remember that there is a list of acronyms and trait names at the bottom of the page for reference. After entering whatever values you would like for each trait click on the Calculate button to generate results. This will create a list of the top 10 genotypes that most closely match the criteria that you entered. The list will be displayed right below the selection index tab. This information can also be downloaded onto your computer by clicking on the Download selection indices link underneath the listed genotypes and selection indices. 25.2.3 Building a Model - Method 3 In addition to creating a model by searching for pre-existing traits or by preexisting trial name, models can also be created by using your own list of clones. This creates a model by using or creating a training population. The page to use the third Method for creating a population model is the same as for the other two models. Select Genomic Selection from under the analyze menu of the main toolbar. This will take you to the Genomic Selection homepage and show you all three available methods to create a model. To see and use Method 3 scroll down and click on the tab labeled Create a Training Population. This will open a set of tools that will allow you to use pre-existing lists or to create a new list. Once the Create a Training Population tab is opened you have the option to use a pre-existing list or create new one. To learn how to create a list, click here. The Make a new list of plots link will take you directly to the Search Wizard that is usually used to create lists. Please note: the only lists that can be used in Method 3 to create a model are lists of plots and trials. If the pre-existing list is not of plots or trials (for example, traits, or locations) it will not show up and cannot be used as a training population. When you create you use a list of trials, the trials data will be combined to create a training data set. To use your custom list of plots or trials as a training population, select the list and click Go. This will take you to a detail page for the training population. From here on you can build models and predict breeding values as described in Method 2. 25.3 Genome Browsing There are two ways to evaluate genotype information within the browser, from an accession detail page or a trial detail page. 25.3.1 Browsing Genotype data by Accession If you are interested in browsing genotype information for a single accession, for example BAHKYEHEMAA, navigate to the accession detail page. Near the bottom of the detail page is a collapsible section called Accession Jbrowse. This section will contain a link to the accession jbrowse page if the necessary genotype data is available. Clicking the link should take you to a page that looks like this, a which point you can browsre the genotype data in the form of a vcf track aligned to the latest build of the genome. 25.3.2 Browsing Genotype data by Trial If you are interested in browsing genotype information for the accessions within a given trial, navigate to the trial detail page. Halfway down the page is a collapsible section called Trial Jbrowse. This section will contain a link to the trial jbrowse page if the necessary genotype data for at least two accessions planted in the trial is available. Clicking the link should take you to a page that looks like this, a which point you can browse the genotype data in the form of vcf tracks aligned to the latest build of the genome. 25.4 Principal Component Analysis (PCA) Principal component analysis helps estimate and visualize if there is sub-grouping of individuals within a dataset based on a number of variables. Currently, you can use marker data to run PCA on datasets. You can run PCA from multiple places on the website. To do PCA on individuals from a trial, go to the trial detail page and find the PCA tool under the Analysis tools section. individuals from a training population you used in a GS modeling, do your modeling and find the PCA tool in the model output page. individuals in a training population and selection population you applied the training model, do your modeling, apply the model on the selection population and find the PCA tool on the selection population prediction output page. individuals in a list of accessions you created, for example using the search wizard, go to the Analyze menu and select the Population Structure, select your list of individuals and run PCA. individuals from multiple trials, create a list of the trials using the search wizard, go to the Analyze menu and select the Population Structure, select your list of trials and run PCA. With all the options, you will get a interactive plot of the two PCs (shown below) that explain the largest variance. Point the cursor at any data point and you will see the individual name with its corresponding PCs scores. By clicking the Download all PCs, you can also download the 10 PCs scores in the text format. 25.5 ANOVA Currently, ANOVA is implemented for a single trial (single year and single location). You can do ANOVA for RCBD, CRD, Alpha and Augmented trial designs. ANOVA is done using linear mixed effects model, where the genotypes is fixed effect and the replications and blocks are random effects. Fixed effect significance level is computed using lmer from lmeTest R package. You can do ANOVA from two places: trial detail and training population detail. In both cases, if the phenotype data was from the supported trial designs, Go to the ANOVA section down in the trial or training population page Select the trait of you want to perform ANOVA Click the Run ANOVA and wait for the result 25.6 Clustering (K-Means, Hierarchical) The K-Means method allows you to partition a dataset into groups (K number). The hierarchical clustering, agglomerative, allows you to explore underlying similarity and visualize in a tree structure (dendrogram) the different levels of similarities (clusters) among samples. You can do clustering based on marker data, phenotype data and GEBVs. When you use phenotype data, first clone averages for each trait are calculated. Both methods use Euclidean distance as a measure of similarity. For the hierachical clustering, the complete-linkage (farthest neighbour) method is used to link up clusters. There are three pathways to using this tool. When you have data in the form of a list or dataset from the search wizard: go to the Analyze menu and select the clustering option make sure you are logged in Select the relevant genotyping protocol, if you are clustering using genotype data select your list or dataset, click Go select clustering type select the data type to use If you are running K-Means clustering, provide the number of partitions (K). If left blank it will partition the data set into optimal numbers for the dataset. click the Run Cluster and wait for the analysis to finish or queue the request and wait for an email with the analysis result. You can download the outputs following the download links. From the trial detail page: Go to the Analysis Tools section Follow steps D to G in (1) In the solGS pipeline: Once you you are in a model output put page, you will see a section where you can do clustering in the same way as above (option 2). K-Means clustering: Hierarchical clustering: 25.7 Genetic Gain You can check for genetic gain by comparing the the GEBVs of a training and a selection population. You can do this in the solGS pipepline once you build a model and apply the model to predict the GEBVs of a selection population. Once at that stage, you will see a section Check Genetic Gain. Select a selection population to compare with the training population and click the Check Genetic Gain button. The genetic gain will be visualized in boxplots. You can download the boxplot(s) as well as the GEBVs data used for the plot(s). 25.8 Kinship and Inbreeding Coefficients This tool allows you to estimate genetic relatedness between a pair of individuals (kinship), homozygousity across loci in an individual (inbreeding coefficient), and genetic similarity of an individual relative to the rest of the population (averge kinship). There are three pathways to using this tool. (1) When you have a list or dataset clones, created from the search wizard: go to the Analyze menu and select the kinship and inbreeding make sure you are logged in Select the genotypic protocol for the marker data select your list or dataset of clones, click Go click the Run Kinship and wait for the analysis to finish, depending on the data size this may take minutes. You can choose to submit the analysis and wait for an email notice to view the results or wait for it to complete. You can download the output following the download links. (2) From the trial detail page: Go to the Analysis Tools section Follow steps C to G in (1) (3) In the solGS pipeline: Once you you are in a model output put page, scroll down to the Kinship and Inbreeding section and run kinship. 25.9 Creating Crossing Groups If you calculate selection index based on GEBVs of multiple traits, and you want to select a certain proportion of the indexed individuals (e.g.top 10%, or bottom 10%) and then you want to partition the selected individuals into a number of groups based on their genotypes, you can use the k-means clustering method. The procedure is: predict GEBVs for 2 or more traits In the models output page, calculate selection indices. Note the name of the selection index data. Go to the clustering section, select the selection index data, select K-means, select Genotype, in the K-numbers textbox, fill in the number of groups you want to create, in the selection proportion textbox, fill in the proportion of the indexed individuals you want to select, e.g.for the top 15 percent, 15. if you wish to select bottom performing, prefix the number with minus sign (e.g.-15) then run cluster and wait for the result. 25.10 Search Wizard Genomic Relationship Matrix (GRM) Download The genomic relationship matrix (GRM) is useful for understanding underlying structure in your population. Breedbase can compute the GRM using rrBLUP. First, select accessions in the search wizard and optionally select a genotyping protocol. If no genotyping protocol is selected, the default genotyping protocol in your system is used (as defined in sgn_local.conf). Specify the minor allele frequency, missing marker data, and missing individuals data filters to apply. The GRM can be returned in a matrix format (.tsv) which shows all pairwise relationships between the selected accessions and is useful for visualization; alternatively, the GRM can be returned in a three-column format (.tsv) which is useful for programs like ASReml outside of Breedbase. The GRM can also be returned as a simple correlation heatmap image (.pdf). The GRM can be computed from parents of the selected accessions granted the parents were genotyped, by clicking the checkbox compute from parents; this is useful for programs where parental lines are genotyped and then hybrids are created and evaluated in the field. 25.11 Search Wizard Genome Wide Association Study (GWAS) Performing a genome wide association study (GWAS) can determine genotypic markers which are significantly correlated to phenotypic traits. Breedbase can compute GWAS using rrBLUP. First, select accessions and trait(s) in the search wizard, and optionally select a genotyping protocol. If no genotyping protocol is selected, the default genotyping protocol in your system is used (as defined in sgn_local.conf). Several traits can be selected in the search wizard; if the traits are not to be treated as repeated measurements then select no in the select box and this will tell Breedbase to return GWAS results independently for the selected traits. If the selected traits are indeed all repeated measurements then select yes in the select box and Breedbase will return as single GWAS analysis across all the phenotypic records. Specify the minor allele frequency, missing marker data, and missing individuals data filters to apply. GWAS results can be returned in a tabular format (.tsv) where the -log10(p-values) for the selected traits are returned; alternatively, the GWAS results can be returned as Manhattan and QQ plots for the selected traits. The GWAS can be computed from parents of the selected accessions granted the parents were genotyped, by clicking the checkbox compute from parents; this is useful for programs where parental lines are genotyped and then hybrids are created and evaluated in the field. The GWAS will filter the data by the input MAF and missing data filters provided. After filtering the data is imputed using an EM method in rrBLUP. The Kinship matrix (GRM) is computed from the imputed genotypic data and used in the GWAS model. The GWAS uses fixed effects for different field trials and replicates in the phenotypic data. 25.12 Spectral Analysis Visible and near-infrared spectroscopy (vis-NIRS) can be related to reference phenotypes through statistical models to produce accurate phenotypic predictions for unobserved samples, increasing phenotyping throughput. This technique is commonly used for predicting traits such as total starch, protein, carotenoid, and water content in many plant breeding programs. Breedbase implements the R package waves to offer training, evaluation, storage, and use of vis-NIRS prediction models for a wide range of spectrometers and phenotypes. 25.12.1 Dataset selection In order to initiate an analysis, the user must select one or more datasets using 2.1. A dataset in Breedbase can contain observationUnit-level (plot-, plant-, or sample-level) trial metadata and phenotypic data from one or more trials. After navigating to the NIRS webpage under the Manage tab in Breedbase, the user can initiate an analysis and select one of these datasets as input for model training. An optional test dataset can be selected in the second step of the workflow. 25.12.2 Cross-validation Five cross-validation schemes that represent scenarios common in plant breeding are available for this analysis. These include CV1, CV2, CV0, and CV00 as outlined below and described in depth by Jarqun et al.(2017) as well as random and stratified random sampling with a 70% training and 30% validation split. For those schemes from Jarqun et al.(2017), specific input datasets must be chosen based on genotype and environment relatedness. Cross-validation choices: * Random sampling (70% training / 30% validation) * Stratified random sampling, stratified based on phenotype (70% training / 30% validation) * CV1, untested lines in tested environments * CV2, tested lines in tested environments * CV0, tested lines in untested environments * CV00, untested lines in untested environments 25.12.3 Preprocessing Preprocessing, also known as pretreatment, is often used to increase the signal to noise ratio in vis-NIR datasets. The waves function DoPreprocessing() applies functions from the stats and prospectr packages for common spectral preprocessing methods with the following options: * Raw data (default) * First derivative * Second derivative * Gap segment derivative * Standard normal variate (SNV; Barnes et al., 1989) * Savitzky-Golay polynomial smoothing (Savitzky and Golay, 1964) For more information on preprocessing methods and implementation, see the waves manual, available through CRAN: waves.pdf 25.12.4 Algorithms Several algorithms are available for calibration model development in Breedbase via the waves package. The TrainSpectralModel() function in waves performs hyperparameter tuning as applicable using these algorithms in combination with cross validation and train functions from the package caret. Currently, only regression algorithms are available, but classification algorithms such as PLS-DA and SVM clasification are under development. * Partial least squares regression (PLSR; Wold et al., 1982; Wold et al., 1984) is a popular method for spectral calibrations, as it can handle datasets with high levels of collinearity, reducing the dimensionality of these data into orthogonal latent variables (components) that are then related to the response variable through a linear model (reviewed in Wold et al., 2001). To avoid overfitting, the number of these components included in the final model must be tuned for each use case. The PLSR algorithm from the pls package is implemented by waves. * Random Forest regression (RF; Ho, 1995) is a machine learning algorithm based on a series of decision trees. The number of trees and decisions at each junction are hyperparameters that must be tuned for each model. Another feature of this algorithm is the ability to extract variable importance measures from a fitted model (Breiman, 2001). In Breedbase, this option is made available through implementation of the RF algorithm from the package randomForest in the waves function TrainSpectralModel(). This function outputs both model performance statistics and a downloadable table of importance values for each wavelength. It is worth noting that this algorithm is computationally intensive, so the user should not be alarmed if results do not come right away. Breedbase will continue to work in the background and will display results when the analysis is finished. * Support vector machine regression (SVM; Vapnik, 2000) is another useful algorithm for working with high-dimension datasets consisting of non-linear data, with applications in both classification and regression. The package waves implements SVM with both linear and radial basis function kernels using the kernlab package. 25.12.5 Output: common model summary statistics After training, model performance statistics are both displayed on a results webpage and made available for download in .csv format. These statistics are calculated by the TrainSpectralModel() function in waves using the caret and spectacles packages. Reported statistics include: * Tuned parameters depending on the model algoritm * Best.n.comp, the best number of components to be included in a PLSR model * Best.ntree, the best number of trees in an RF model * Best.mtry, the best number of variables to include at every decision point in an RF model * RMSECV, the root mean squared error of cross-validation * R2cv, the coefficient of multiple determination of cross-validation for PLSR models * RMSEP, the root mean squared error of prediction * R2p, the squared Pearsons correlation between predicted and observed test set values * RPD, the ratio of standard deviation of observed test set values to RMSEP * RPIQ, the ratio of performance to interquartile distance * CCC, the concordance correlation coefficient * Bias, the average difference between the predicted and observed values * SEP, the standard error of prediction * R2sp, the squared Spearmans rank correlation between predicted and observed test set values 25.12.6 Export model for later use Once a model has been trained, it can be stored for later use. This action calls the SaveModel() function from waves. Metadata regarding the training dataset and other parameters specified by the user upon training initialization are stored alongside the model object itself in the database. 25.12.7 Predict phenotypes from an exported model (routine use) For phenotype predictions, users select a dataset and can then choose from models in the database that were trained using the same spectrometer type as the spectral data in the chosen dataset. Predicted phenotypes are stored as such in the database and are tagged with an ontology term specifying that they are predicted and not directly measured. Metadata regarding the model used for prediction is stored alongside the predicted value in the database. Predicted phenotypes can then be used as normal in other Breedbase analysis tools such as the Selection Index and GWAS. 25.12.8 FAQ The Breedbase Spectral Analysis Tool does not allow for prediction models involving data from multiple spectrometer types at once. References * Barnes, R.J., M.S. Dhanoa, and S.J. Lister. 1989. Standard normal variate transformation and de-trending of near-infrared diffuse reflectance spectra. Appl. Spectrosc. 43(5): 772-777. doi: 10.1366/0003702894202201. * Breiman, L. 2001. Random forests. Mach. Learn. 45: 5-32. doi: 10.1201/9780429469275-8. * Ho, T.K. 1995. Random decision forests. Proc. Int. Conf. Doc. Anal. Recognition, ICDAR 1: 278-282. doi: 10.1109/ICDAR.1995.598994. * Jarqun, D., C. Lemes da Silva, R.C. Gaynor, J. Poland, A. Fritz, et al.2017. Increasing Genomic-Enabled Prediction Accuracy by Modeling Genotype x Environment Interactions in Kansas Wheat. Plant Genome 10(2): plantgenome2016.12.0130. doi: 10.3835/plantgenome2016.12.0130. * Johnson, R.A., and D.W. Wichern. 2007. Applied Multivariate Statistical Analysis (6th Edition). De Maesschalck, R., D. Jouan-Rimbaud, and D.L. Massart. 2000. The Mahalanobis distance. Chemom. Intell. Lab. Syst. 50(1): 1-18. doi: 10.1016/S0169-7439(99)00047-7. * Mahalanobis, P.C. 1936. On the generalized distance in statistics. Natl. Inst. Sci. India. * Savitzky, A., and M.J.E. Golay. 1964. Smoothing and Differentiation of Data by Simplified Least Squares Procedures. Anal. Chem. 36(8): 1627-1639. doi: 10.1021/ac60214a047. * Shrestha, R., L. Matteis, M. Skofic, A. Portugal, G. McLaren, et al.2012. Bridging the phenotypic and genetic data useful for integrated breeding through a data annotation using the Crop Ontology developed by the crop communities of practice. Front. Physiol. 3 AUG(August): 1-10. doi: 10.3389/fphys.2012.00326. * Vapnik, V.N. 2000. The Nature of Statistical Learning Theory. Springer New York, New York, NY. * Wold, S., A. Ruhe, H. Wold, and W.J. Dunn, III. 1984. The Collinearity Problem in Linear Regression. The Partial Least Squares (PLS) Approach to Generalized Inverses. SIAM J. Sci. Stat. Comput. 5(3): 735-743. doi: 10.1137/0905052. * Wold, S., M. Sjstrm, and L. Eriksson. 2001. PLS-regression: a basic tool of chemometrics. Chemom. Intell. Lab. Syst. 58(2): 109-130. doi: 10.1016/S0169-7439(01)00155-1. 25.13 General Mixed Model Tool The general mixed model tool is available at /tools/mixedmodels and a link is provided from the Analyze menu. To use the mixed model tool, first create dataset using the Wizard containing the data that you would like to analyze. Select the Mixed Model tool from the Analyze menu. You are presented with a workflow. On the first step of the workflow, select the dataset that you wish to analyze, click on Choose dataset to continue. The second part of the workflow presents you with the traits in the dataset; you can select one or more traits from the lists using the select buttons. If you selected one trait, a bargraph of the trait distribution will be shown. Click the Next step button to move to the next screen. On the model build screen, all the factors are displayed that are contained within the dataset. The factors are presented as a list of blue buttons that can be dragged using the mouse to areas on the screen which build a mixed model equation. The areas correspond to fixed factors, random factors, and optionally to more complex factors, such as fixed factors with interaction and fixe factors with vriable slope/intersects. Drag the available factors to the corresponding area. To calculate BLUPs for germplasm, drag the germplasmName button to the Random factors area. To calculate BLUEs, drag it to the Fixed factors area. The factors need to have different levels contained within them, for example, if there is only one trial in the dataset, it cannot be used as one of the factors. Click on Run analysis and got to next step to run the mixed model and display the results. The result view contains two tabs, one with the raw data, either BLUPS or BLUEs, and the other the adjusted means from the raw data. The results can be stored in the database as an analysis, by clicking the button provided on the top of the data. 25.14 Genomic Prediction of Cross Performance (GCPC) The GCPC tool is available at /tools/gcpc and a link is provided from the Analyze menu. The GCPC tool implements genomic prediction with additive and directional dominance in the linear mixed model to predict for cross performance. Before using the tool, first create a dataset using the Wizard containing the data that you would like to analyze. (The dataset should have genotyping_protocols). Second, create Selection Indices for your traits using Selection Index in Analyze Menu. To use the tool, Select the GCPC tool from the Analyze menu. Then, select the dataset with genotyping_protocols that you wish to analyze, click on Proceed to Factor Selection to load available factors that can be included in the model. Select the factors you wish to include in the model either as Fixed or Random. Click None for factors that you dont want to include in the model. Note that the germplasmName is factored as Random by default. The next step is to select the selection index for your traits on the dropdown menu. Once you are through, click Run GCPC to run the model. The output will be presented in form of a table with ID, Parent1, Parent2 and their cross prediction merit organized in descending order. The results will also have sex information based on whether the dataset has plant sexes available in the database. "],["404.html", "Page not found", " Page not found The page you requested cannot be found (perhaps it was moved or renamed). You may want to try searching to find the page's new location, or use the table of contents to find the page you are looking for. "]] +[["index.html", "User Manual of Breedbase Introduction", " User Manual of Breedbase Breedbase team 2025-01-23 Introduction Welcome to the Breedbase manual! Use the table of contents in the left sidebar to navigate to the topic of your choice. At any time you can select specific text in the manual to highlight or annotate it using Hypothesis. Open the Hypothesis sidebar on the right to view existing annotations. You may also use the widgets at the top of the screen to: - collapse the sidebar - search for a specfic topic - change the font size, font type, or the site theme - download the manual as a pdf Manual as a pdf can be download here also. Download This manual is intended for database users. If you are a developer looking for software implementation details, please visit the developer wiki instead: https://github.com/solgenomics/sgn/wiki "],["basic-website-usage.html", "Chapter 1 Basic Website Usage 1.1 Creating a User Account 1.2 Managing your Account 1.3 Menu Layout 1.4 Working with Lists 1.5 User Permissions", " Chapter 1 Basic Website Usage 1.1 Creating a User Account 1.1.1 Verifying first that you do not already have an account Before creating an account, please verify first that you dont already have an account. You can use Search menu to check if you already registered as a user. In the Search menu, selecting the People tab and search your name. If nothing is found, proceed with the instructions below. Otherwise, clicking the Login button. If you have forgotten your password, you can retrieve it by clicking the Forgot your password? link on the login page. 1.1.2 Creating a user account On the right of the toolbar, clicking on Login. It will take you to the login page. On the login page, clicking on the link sign up for an account. It will take you to the page below: Filling in all of the information, then clicking Create Account. After you submit the information, an email will be sent to the provided email address. Checking your email and clicking on the link to activate your account. 1.2 Managing your Account 1.2.1 Login To login, clicking the Login link in the toolbar on any page and enter your username and password. If you have forgotten your password, you can retrieve it by clicking the Forgot your password? link on the login page. 1.2.2 Editing Account Settings Account settings can be edited by clicking on the my profile link displayed as your user name, on the right of the toolbar. You must login, in order to access and change account settings. You can add personal information to your account using the View or update personal information link. To change your password, username, or your contact email, clicking on Update account information link. You must provide your old password before you can make any changes. 1.2.3 Changing Your Account Status: From User to Submitter After you create an account, your account has a user status. This account has limited privileges. Accounts with user status are able to: Change personal information Post comments on pages Post to the forum To upgrade your account status to submitter, contact the database curators using the contact link provided at the footer of each page. Submitter accounts can add data, such as new plots, accessions, phenotype data and images. 1.2.4 Submitting Feedback on an SGN Database We appreciate your feedback! Feel free to submit any questions or suggestions by using the Feedback link provided at the footer of each page. 1.3 Menu Layout SGN Database websites have a toolbar on the top of each page with a number of menus for convenient access of major functions. The menus, as pictured below, are search, manage, analyze, and maps. The toolbar also provides a quick search, a log in button, and a new user button. 1.3.1 Menu Options Search In the Search menu, the options are: Tab Description Wizard Search different accessions and plots by location, year, trial, and trait data. Can also be used to create lists of different types. Accession and plots Search accessions and plots using a variety of criteria Trials Search trials by name, description, breeding program, year, location, and trial type. Markers Search different markers Images Search images contained in the SGN database People Search database users Manage In the Manage menu, the options are: Tab Description Breeding Programs View, add and delete breeding programs Locations View, add and delete locations Accessions Manage and search different accessions Seedlots Manage and search different seedlots Crosses Create new crosses in the database Field Trials Manage field trials. Create trials using different field layouts. Genotyping Plates Manage genotyping plates. Create 96 or 384 well plates. Phenotyping Upload phenotyping files from the Tablet Field Book application Field Book App Manage the field book app data (download files to tablet) Barcodes Refers to the old barcode system, mainly historical Download Download information in the database based on lists Analyze Clicking on the Analyze link will give a full menu of all analysis functions In the Analyze menu, the options are: Tab Description Breeder Tools Breeder Home Access breeding functionalities. Lists important and helpful links. Barcode Tools Manage, create, and download barcodes. Also access barcode tools. Genomic Selection Can search for traits, start building a GS model, and predict values based on genotypes Sequence Analysis BLAST Sequence homology search Other Ontology Browser Browse all recorded ontologies 1.4 Working with Lists Lists are collections of identifiers that are stored in the database. Lists can be composed of accessions, plots, traits, locations, and trials. Lists are attached to the individual users account, and can only be created and seen by the user while logged in. SGN databases make heavy use of lists in a number of tools on the website. For example, trials are created using lists of accessions. 1.4.1 Creating lists Lists can be generated in various ways: One way to create a list is by clicking on the Lists link located on the toolbar. To create a new list, enter the name of your new list and then clicking on the New List button. The name of the list can be anything, but should be unique and should be something to help you easily identify. You can find the list that you entered on the Your Lists page. To add items to your list, click on the View icon to open List Contents page. On the List Contents page, enter items that you want to add to the list, then click on Add button. The page will be updated and will display your items in a table at the bottom of the page. It is possible to sort the list if you need. Select the type of items in your list. To verify that the items that you added to your list are already stored in the database and that you selected a correct type for the items, click on the Validate button. If those items are already in the database, a message will indicate that This list passed validation Note that a list cannot contain duplicate elements. If a duplicate item is entered, the list manager will inform the user that the element is already in the list and will not add it again. Another easy way to create a list is to use 2.1, which can be accessed from the Search menu. 1.4.2 Viewing and editing lists Lists can be viewed and edited using the Lists link on the toolbar. Clicking on the link will open a window that displays all of your lists, as well as an option to create new lists. This page shows all lists that have been created, including those created by using the Search Wizard. You can view and edit your lists by using Actions buttons. Clicking on the view icon will open a new window called List Contents that allows you to change the list name, the type of the list, add new items, or delete existing items. Clicking on the delete icon will delete your list. Caution: this action cannot be undone. Clicking on the download icon will download the contents of your list to your computer. Clicking on the make public icon will make your list available for other users to view and use your list. 1.5 User Permissions Breedbase accounts are assigned one or more of four different roles to determine the level of access they have within the database. The possible roles are User, Submitter, Sequencer, and Curator. Each role grants specific permissions, and careful management of them helps prevent data from being altered or deleted in error. Accounts are also assigned Breeding Program role(s) to grant access to the specfic breeding program(s) they work with. The User role gives an account permission to view and download data throughout the database. The Submitter role gives an account permission to design field experiments and to upload and edit data using the tools in the Manage section. In order to submit and manage breeding data within a given breeding program, a submitter also must have a matching Breeding Program role. The Sequencer role gives an account permission to design genotyping experiments and submit plates to a genotyping service. The Curator role gives an account permission to do all of the above, as well as to delete data within the database. The Curator role also enables the addition or deletion of roles for all database accounts in the Manage User Roles tool. "],["searching-the-database.html", "Chapter 2 Searching the Database 2.1 The Search Wizard 2.2 Accessions and Plot Search 2.3 Trials Search 2.4 Trait Search 2.5 Ontology Browser 2.6 Search Seedlots", " Chapter 2 Searching the Database You can search for information on the database by using the following search options: Wizard, which uses combined criteria specified by users; Accessions and Plots; Trials; Markers; Images; People; FAQ. 2.1 The Search Wizard 2.1.1 How the Search Wizard Works The search wizard presents a number of select boxes, which are initially empty. You start searching by picking a category of data from the dropdown above the left-most select box. Once a category has been picked, the database will retrieve all the options within this category and display them within the first select box. You then select one or more options from the first select box, which activates the second dropdown. You can then select a category from the second dropdown, and repeat this same search process through all four dropdowns and select boxes. In the example above, the locations category was chosen in the first dropdown. The first select box then displayed all the possible locations in the database. The option Ibadan was selected. This activated the second dropdown. The category years was chosen in the second dropdown. The second select box then displayed all the years that are linked in the database to the location Ibadan. From that list, the options 2011 and 2012 were selected. This activated the third dropdown. A final category, accessions, was chosen in the third dropdown. The third select box was then populated with the 3847 accessions in the database that are linked with the location Ibadan in the years 2011 or 2012. In addition to the basic search operations demonstrated above, users can take advantage of two more features: Load Selection from List Instead of picking a category in the first dropdown, users can instead populate the first selectbox from a list by scrolling down in the first dropdown to the Load Selection from List subheading and selecting a list. This is useful for starting queries with a list of plots, as this category is not among the options in the first dropdown. ANY/MIN/ALL Toggle By default, the search wizard combines options within a category using an OR query. In the example above, in the third panel the wizard retrieved accessions associated with the location Ibadan in ANY of the years 2011 OR 2012 If the user clicked the toggle below the second select box to change it to ALL before choosing accessions in the third dropdown, the wizard would instead retrieve accessions associated with the location Ibadan in the years 2011 AND 2012. This will be a smaller set of accessions, because any accessions used only in 2011, or only in 2012 will be excluded. A more advanced search could use the MIN toggle option. This allows the user to make a query in between an ANY or ALL query, where a minimum number of matches from the selected column will be used as a filter for the next column. The minimum can be provided as either a percentage (%) or an actual count of items (#). In the example above, if the years 2011, 2012, and 2013 were selected in the second column, the user could enter 2 in as the minimum and select # as the minimum match type. This would select accessions in the third column that were used in 2 or more of the selected years. 2.1.2 How to use retrieved data Getting more Info Any option in the wizard select boxes (except for years) can be clicked to open a page with more details. The new page is opened in a new tab. Saving to a list You can store the highlighted items in any selected box to lists. This is done using the inputs and buttons directly below the select box. Dont forget, you must be logged in to work with lists! To add items to an existing list, first pick an existing list using the Add to List dropdown on the left. Then click the Add button. A popup window will confirm the action, and display the number of items added to your existing list. To store items to a new list, first type a new list name in the Create New List text input on the left. Then click on the Create button. A popup window will confirm the action, and display the number of items added to your new list. Downloading Data You can download trial metadata, phenotypes and genotypes associated with the highlighted items in the wizard select boxes. This is done using the buttons in the download section at the bottom of the page. Dont forget, you must be logged in to download data! Metadata Trial metadata can be downloaded by selecting a subset of trials from the database or based on your search categories. To download, click on Related Trial Metadata, a dialog will appear. Select download format and click the Metadata button to complete your download. Phenotypes The phenotypes download is quite flexible, and can download a subset of all the trial data in the database based on whichever categories and options you currently have selected. Simply click on the Related Trial Phenotypes link, review the options, changing or adding any additional parameters you like, then click Download Phenotypes. Genotypes The genotype download is more stringent. It requires a minimum of one accession and one genotyping protocol to be selected in the wizard select boxes. The text box in the download section of the page will help track what has been selected. Once clicked, the Download Genotypes button will download a genotype file for the selected accessions. Saving the wizard selections As discussed above, the selections of the individual select boxes in the wizard can be saved separately to a list. The lists can be used as inputs in other tools on the site. However, sometimes creating a selection is quite time consuming and restoring the selections from four different lists would be cumbersome too. Therefore, the selections can be saved together in a dataset, and named for later retrieval. This is done in the section Load/Create Datasets that is below the first two wizard select boxes. To select an existing dataset, one uses the Load Dataset dropdown. A particular dataset can be chosen, and the Load button can be clicked to retrieve and display the dataset in the wizard. To create a new dataset using items that are selected in the wizard, one can enter the name of the new dataset in the Create New Dataset text box. Once the dataset has been given a name, clicking the Create button will save the new dataset. 2.1.3 Updating the Wizard The search wizard uses a copy of the database, or a cache, to return results quickly. If data appears to be missing, it usually means that the cache needs to be updated. Users with submitter privileges or above can do this using the Update Wizard button. One can also use the Refresh Lists button to update the available lists. This will take just a few seconds in small databases, but may take a few hours to complete in larger databases. 2.2 Accessions and Plot Search Accessions and their related materials (cross, plant, plot, population, tissue_sample, training population) can be searched by using Search Accessions and Plots page. On this page, accession is the default stock type; however, you can change stock type by selecting an option from the drop-down list. From this page you can construct detailed queries for stock types. For example, by using the Usage section, the Properties section, and the Phenotypes section you could search for accessions which were diploids used in a specific year and location and were also phenotyped for height. You can also search for accessions based on genetic properties, such as the location of an introgression on a specific chromosome. It is possible to query over any of the available properties, such as ploidy_level, country of origin, introgression_chromosome, etc. In the search result table it is possible to select any of the available properties to view. At the bottom of the accession search there is a phenotype graphical filtering tool. Here you can filter down accessions based on combinations of trait performance. The filtered down accessions are then able to be saved to a list. For information on adding Accessions please see the Managing Accessions help. For information on how field trial plots, plants, tissue samples, and subplots are added to the database, please see the Managing Field Trials help. 2.3 Trials Search Trials on the database can be searched based on trial name, description, breeding program, year, location, trial type, design, planting date, and harvest date. 2.4 Trait Search On the Trait Search page (menu item Search > Traits), traits in the database can be searched by ID, name, or descripiton. Optionally, a starting list of traits can be selected to filter down results. Selecting traits in the results of the search allows one to add the selected results to a trait list, or create a new trait list from the select results. 2.5 Ontology Browser A more advanced tool for searching for Traits is the ontology browser, available by clicking on Analyze and Ontology Browser. From here you can search ontologies and see the various classifications of terms in a tree display. The terms which appear in the Trait Search in 2.4 are only variable terms. The ontology browser shows these variables as different from their grouping terms by indicating VARIABLE_OF like in the following screenshot. 2.6 Search Seedlots Seedlots are different from Accessions in that they represent the physical seed being evaluated in an experiment. Seedlots have things like physical storage locations and seed quantities, which accessions do not. To search for available seedlots you go to Manage and then click Seed Lots. By clicking Search Seedlots, you can specify query information. The results from your search will be in the table below the search form. "],["managing-user-roles.html", "Chapter 3 Managing User Roles 3.1 What are User Roles? 3.2 The Manage User Roles page", " Chapter 3 Managing User Roles 3.1 What are User Roles? Every user account in Breedbase has one or more associated roles that determine the authorizations (what the user is allowed to do) in the database. There are three fundamental roles, curator, submitter, and user, which determine basic read/write levels. The curator status can read and write everything in the database. The submitter status can add information and edit or delete previously submitted information. The user type can only read data. Additional roles represent the breeding programs, and are sometimes used to fine-tune write and edit capabilities, as it necessary for multiple users in a breeding program to edit each others data. 3.2 The Manage User Roles page In the Manage menu, select the item User Roles. This will show the current users in the database with their associated roles. If you are logged in as a curator, the table will show system roles as well as breeding program roles; if you are logged in as a submitter or user, it will show breeding program membership. If logged in as a curator, the roles can be added or deleted. To delete a role, click on the X in the role name. A confirm dialog will be displayed to prevent accidental deletion. To add a role, click on the plus sign next to the roles. A dialog will pop up with a list of roles. Select the desired role and click Submit. The new role should be displayed next to the user immediately. Role deletions and additions will be effective immediately. It is recommended that few users be given the curator privileges to avoid confusion over data ownership and accidental data overwriting and deletion. @ref(managing_user_roles) "],["managing-breeding-programs.html", "Chapter 4 Managing Breeding Programs", " Chapter 4 Managing Breeding Programs New breeding programs can be added by using Add New Program button on the Manage Breeding Programs page. Clicking on the Add New Program button will generate a blank form for you to fill out the name and description of the breeding program that you want to add. After completing the form, click on Add Breeding Program button to finish the process. "],["managing-locations.html", "Chapter 5 Managing Locations", " Chapter 5 Managing Locations Field locations can be managed using the Manage Locations page. On this page, locations in the database are organized based on their breeding programs. Each location has a link to trials conducted in that location. To add a new location, click on the Add Location button that links to the Add New Location form. On the Add New Location form, fill out the location name that you want to add. Latitude, longitude, and altitude are optional. Submit the new location by clicking on the Add Location button at the bottom right of the form. "],["managing-accessions.html", "Chapter 6 Managing Accessions 6.1 Add Accessions Using A List 6.2 Uploading Accessions and Accessions Info From A File 6.3 Email alert for accession upload 6.4 Add Parentage (Pedigree) Information to Accessions 6.5 Working with grafts 6.6 Bulk renaming of accessions", " Chapter 6 Managing Accessions The Manage Accession page provides links for adding new accessions. You can choose to add accessions into the database by either using a List you have created or by uploading XLS or XLSX file. Both options will be detailed below. To begin click on the Add Accessions or Upload Accession Info link. This will open a dialog allowing you to select either Using Lists or Uploading a File. 6.1 Add Accessions Using A List First we will show how to add accessions Using Lists. Here you select an accession list which you have previously made. If you need to create or edit your list you can do so now by clicking Manage Lists. Once you have selected your list you can click Continue. The first dialog which can appear will show the accessions which already exist in the database. Click Continue. The next dialog which can appear will show accessions which have very similar matches to the accession names you are adding. In the example below, there are two accession names that are very similar to accession names already in the database. TME0419 is very similar to TME419, and actually is probably a mistake that should not be added to the database. To avoid situations in adding a mistaken duplicate accession, the database gives you options for moving forward with these very similar looking accession names. You can either continue saving the name in your list, replace name in your list with selected existing name, remove name in your list and ignore, or add name in your list as a synonym to selected existing name. Clicking Download Fuzzy Matches will return a tabular result of the fuzzy accession name results shown. Click Make changes and continue to move on. The final dialog shows the accessions that will be added. Here you need to assign the species of these accessions. You can optionally group the accessions into a population and/or add an organization for the accessions. Once you click Add Accessions, the new accessions will be created in the database and you will see the following confirmation dialog, which includes links to the newly created accessions. 6.2 Uploading Accessions and Accessions Info From A File The process to upload accessions is very similar to using a list, but enables you to add a variety of properties, such as synonyms, to the accessions in bulk. Clicking on Spreadsheet format will show the following dialog. Here it shows that the file must be XLS or XLSX format and can contain a number of header columns as attributes. It is important that you use exactly the same header column names as listed here. In columns that indicate that many attribute values can be passed at once using (s), such as synonym(s), you can pass a comma separated list of values, such as synonym1,synonym2. Once you have selected your XLS or XLSX file for upload, click Continue. The following process is the same way as with lists: The first dialog which can appear will show accession names which are already in the database. Click Continue and the next dialog that can appear will show fuzzy matches for the accession names you are trying to upload. Here you can choose to prevent adding accession names which look very similar to each other as wrongly duplicated accessions. Click Continue and the final dialog that will appear will show the information to be added into the database. Here it is divided into accession names that are new and accession names that already exist in the database; however, for the accession names that already exist it will show additional attributes that originated from your file that will be added to these accessions. Once you click Add Accessions, the new accessions and information will be created in the database and you will see the following confirmation dialog, which includes links to the created and updated accessions. 6.3 Email alert for accession upload When uploading accessions from a file, you have the option to receive email notifications about the status and results of your upload by clicking the Email Alert checkbox. By default, the system will use the email address associated with your account, but you have the option to enter a different email address if you prefer. After submitting, the upload process runs in the background, allowing you to continue using the interface without interruptions. Once the process completes, you will receive an email with the upload results, including any warnings or errors that may have occurred during the upload. 6.4 Add Parentage (Pedigree) Information to Accessions Pedigree data can be uploaded from your computer by clicking on Upload Pedigree File IMPORTANT! Please use only tab-delimited text file format (.xls or .xlsx formats are NOT supported). You can find detailed information on how to prepare pedigree file by clicking on File format information The currently supported format has four tab separated columns: progeny name female parent accession male parent accession type Type can be biparental, self, backcross, sib, polycross, reselected, or open. In the case of the open type, the male parent accession field can remain blank. For all other types, both columns should be filled, even if they contain the same information as another column (such as self). 6.5 Working with grafts Grafts are plants that are composed of a rootstock and a scion, which are genetically different and fused together, usually at the stem level. To work with grafts, the grafts interface needs to be activated by adding a configuration parameter in the sgn_local.conf file. The parameter is show_grafting_interface. It should be set to 1 in sgn_local.conf, the default is 0 (in sgn.conf). Grafts to be created need to be specified using an Excel file (xlsx format) with two columns. The first column should have the header scion accession and should list accession names that will be scions. The second column should have the header rootstock accession and should list accession names that will be rootstocks. In the database, the graft accessions will created as single accessions. The graft accession will have two relationships, one to the scion accession (scion_of relationship) andone to the rootstock (rootstock_of relationship). These relationships are displayed on the pedigree viewer. The graft accession name is created from the scion accession name and the rootstock accession name, separated by the graft separator character. By default, the graft separator character is the plus sign +. The graft separator character can be changed in the sgn_local.conf file, using the parameter graft_separator_string. The graft separator string should not occur in any other accession names that are not grafts. When the grafting interface is activated, a new button will be shown on the manage accessions page, called Upload Grafts. Clicking the button brings up the upload grafts dialog. Select the Excel file containing the grafting information. The system will validate the file, for example, check whether the accessions are in the database, and if the headers are correct. The validation result will be presented, and if problems are found, they will be listed. In addition, if there are problems, the Upload button will be grayed out and upload will not be possible. Conversely, if there are no problems, the Upload button will be activated and can be clicked to upload the data. If the upload completes, a completion message is displayed with a summary what was uploaded. Grafted accessions can be used like any other accession, for example, they can be used on field layouts. If you create a list of graft accessions, use the list type accessions. Note that you shouldnt create new grafts based on other grafts. The scion accession and the rootstock accession have to be different, otherwise they will not be created. 6.6 Bulk renaming of accessions Accessions can be renamed in bulk using the rename accessions feature. To rename accessions, prepare a tab delimited file with two columns: the first column should have the header old name and contain the accession names that need to be changed. The second column should have the header new name and contain the names that the accessions in column 1 should be renamed to. The accession renaming feature is available from the Manage->Accessions page. Click on the Rename Accessions button. The first step is the upload of the file with a verification step. The verification step checks whether all the accession names in column 1 exist in the database, and whether all the accession names given in column 2 do NOT exist in the database. Only if both conditions are met, will the rename button become active, otherwise an error message is displayed listing the offending accession names. Optionally, the old name can be automatically added as a synonym to the renamed accession, using the checkbox on the submit form. This option is clicked by default. Unclick the checkbox to NOT save any old names as synonyms. Note that accession renaming should not be undertaken lightly. This feature is intended for special use cases, such as where accessions are created in a nursery with a name that is different from the accession name in the downstream breeding program. It can also be used to rename accessions in bulk that have spelling mistakes and other issues. Please note however, that the tool does not make any attempt to change the names of associated elements, such a plots, that may have been constructed using accession names. Because of the many implications of accession renaming, the feature is limited to accounts with the curator role. "],["managing-seed-lots.html", "Chapter 7 Managing Seed Lots 7.1 Add New Seedlot(s) 7.2 Seedlot Transactions 7.3 Seed Inventory 7.4 Find Seedlots For a List of Accessions 7.5 Create a seedlot for an Accession or Cross 7.6 Add quality data to a seedlot 7.7 Seedlot Maintenance Events 7.8 Deleting Seedlots", " Chapter 7 Managing Seed Lots Seedlots are different from Accessions in that they represent the physical seed being evaluated in an experiment. Seedlots have things like physical storage locations and seed quantities, which accessions do not. The seed in seedlots can be from crosses or can be named accessions. Seedlots from crosses would represent seed harvested. Click Manage and then Seed Lots to begin. 7.1 Add New Seedlot(s) To add a single new seedlot, click on Add Seedlot. This will bring up the following dialog where you enter information about where the seedlot exists, what accession or cross is contained in it, and how many seeds there are. A seedlot must contain either an accession or a cross, and not both. A seedlot must have a weight in grams or a seed count or both of these. In the case where you have many seedlots to add to the database, you can upload an excel XLS or XLSX file instead. Click Upload Seedlots to see the following dialog. 7.2 Seedlot Transactions Seedlots are capable of tracking where seeds came from, such as from crosses, and to where seeds go, such as to plots in the field. If you navigate to a seedlot detail page you will see the following. On this page you see and can edit information regarding a single seedlot, such as its name and location. You will also see a table indicating all t he transactions that a seedlot has been involved in, such as if it was planted in a plot in the field. Transactions to field plots are created when adding or uploading a new trial or from a trials detail page. Clicking on Add New Transaction let you add a transaction from between this seedlot and another seedlot. This kind of transaction is useful for representing if you have distributed seed to different locations. 7.3 Seed Inventory To inventory your seed: 1) Make sure your seedlots are in the database. Use Add New Seedlot to add a single seedlot or Upload New Seedlots to add many. 2) Make sure your seedlots are barcoded. You can print these barcodes from the database. 3) Use the Inventory Android Application to scan seedlot barcodes and record weight. Then use Upload Inventory to upload this info into database. If you prefer you can create your own CSV file and upload that, if you do not want to use the Inventory Application. For more info about the Inventory Android Application go to Inventory. Clicking the Upload Inventory button will bring the following dialog: The CSV file that should contain your inventory should meet these Template requirements. The Seed Inventory Android Application exports this exact file. 7.4 Find Seedlots For a List of Accessions A convenient tool for searching available seedlots for a list of accessions is available in the list tool. First open up your list of accessions. For help opening a list of accessions please see the List section help. There is a button called See Available Seedlots. Once you click this, you will see the following table in a dialog. From here you can create a list of seedlots using the checkboxes and the input at the bottom. 7.5 Create a seedlot for an Accession or Cross Complementary to what we saw above for creating seedlots from the Manage Seedlots page, it is possible to create a new seedlot from an accessions detail page or from the cross detail page. On the accession detail page, this is visible in the Related Stocks section as seen below. The cross detail page has an identical section. Notice the link for creating a new seedlot, which streamlines adding the seedlot. 7.6 Add quality data to a seedlot Quality information can be added to a seedlot in the quality field. This is also available as a column in the file upload format. It is recommended to use a controlled vocabulary, defined by the user, for the quality field. For example, good quality seed should be labelled ok, whereas other quality descriptors could be moldy, insect damage, or low sprouting, etc. 7.7 Seedlot Maintenance Events For some crops, such as sugar kelp, a seedlot requires routine maintenance for the successful long-term storage of the seedlot. (For example, a Seedlot Maintenance Event for sugar kelp would be the routine change of the water that gametophytes are kept it). Breedbase can now store a record of these Seedlot Maintenance Events associated directly with existing Seedlots. Maintenance Events can be uploaded using a simple Excel template or recorded directly on the website. 7.7.1 Setup Each Breedbase instance needs to be configured to support the storage of Seedlot Maintenance Events since each crop will have their own distinct set of maintenance events for their seedlots. To check if your Breedbase instance supports this feature, go to the Manage menu and select the Seed Lots page. Make sure you are logged in and look for the Seedlot Maintenance button near the top, next to the Create Seedlot(s) and Upload Inventory buttons. If you dont see this button, contact the developer(s) supporting your Breedbase instance and ask if they can setup this feature. The location of the Seedlot Maintenance button on the Manage > Seed Lots page 7.7.2 Adding Events Seedlot Maintenance Events can be added using two methods: 1) Uploading an Excel template or 2) Recording events directly on the website Uploading Events with Excel Template To bulk-upload a file of Seedlot Maintenance Events, first create an Excel (.xls or .xlsx) file with the following headers: seedlot - the name of the Seedlot to associate the event with (must exactly match an existing Seedlot in the database) type - the name of the Seedlot Maintenance Event type (these vary between Breedbase instances, a list of supported event types is displayed on the upload page) value - the value of the Seedlot Maintenance Event (these may be different for each event type and vary between Breedbase instances, a list of supported event values is displayed on the upload page) notes - optional, additional notes/comments about the event operator - the username of the Breedbase user that recorded the event timestamp - the date/time the event was recorded, in YYYY-MM-DD HH:MM:SS format Once you have an Excel file with the events filled out, follow these steps to upload the events to the database: Make sure you are logged in to your Breedbase instance Go to the Manage > Seed Lots page Select the Seedlot Maintenance button Select the Upload Maintenance button Choose your Excel (.xls or .xlsx) file to upload Select the Upload button The Seedlot Maintenance upload dialog, showing the supported event types and values (for sugar kelp) Recording Events on Website To add individual Seedlot Maintenance Events to the database in real time, as theyre being recorded, use the Record Maintenance page. Follow these steps to record Seedlot Maintenance Events: Make sure you are logged in to your Breedbase instance Go to the Manage > Seed Lots page Select the Seedlot Maintenance button Select the Record Maintenance button Enter the Seedlot Name or scan a barcode that has the Seedlot Name encoded. Once entered, the box at the top of the page will display basic information about the Seedlot as well its recently recorded events. Select or Enter the values of individual events Optionally, notes button next to each event to add additional notes/comments about that specific event Make sure the operator/username and timestamp are correct Select the Submit button to add the recorded events to the database. NOTE: any events that remain selected as Not Recorded will not be submitted to the database. The Seedlot Maintenance record page, as configured for sugar kelp 7.7.3 Displaying Events Recently recorded Seedlot Maintenance Events are displayed in a table from the main Seedlot Maintenance page, as well as the detail page for individual Seedlots. Unfiltered table of recent Seedlot Maintenance events The events displayed in these tables are sorted by timestamp, with the most recently recorded events displayed first. The displayed events can be filtered using any number of supported filter criteria, such as: - seedlot names (as entered on the page or using an existing seedlot list), - dates (on, on or before, before, on or after, and/or after the entered dates) - event types - event type values - operator/username Select the properties of the filter(s) you want to apply, then select the Add button next to the button to add the filter to the list of applied filters. Once youre done adding filters, select the Filter button to search the database for the filtered events. A filtered table of Seedlot Maintenance events The filtered events can be downloaded directly from the table using the Excel or CSV buttons at the top of the table. Or Seedlot Maintenance Events can be bulk-downloaded (this includes all events for a Seedlot) using a list of Seedlots from the main downloads page (see below). 7.7.4 Downloading Events To bulk-download all events for a specific subset of Seedlots: Create a list containing the Seelots you are interested in. Go to the Download Using Lists page (Manage > Download) Find the Download Seedlot Maintenance Events section Select your list of Seedlots Select the Download button to generate the download file The downloaded file will follow the same format as the upload template and will contain all recorded Seedlot Maintenance Events for each Seedlot in the list. 7.8 Deleting Seedlots Seedlots can be deleted on the Manage Seedlots page (/breeders/seedlots) by search the seedlot and then clicking the X to delete one seedlot at a time. To delete a seedlot, the logged in user needs the required delete privileges on the seedlot. The seedlot also should not have any transactions associated with it (except for the initial transaction). To delete seedlots in bulk, generate a list of type seedlot, for example, using the wizard. Open the section Delete seedlots using a list on the Manage Seedlots page and select the list. Seedlot deletion using a list is only available to user with curator status. "],["managing-populations.html", "Chapter 8 Managing Populations", " Chapter 8 Managing Populations Populations are modeled as groups of accessions. This grouping can be useful in downstream analyses. To manage these populations go to Manage Accessions and scroll tp the bottom. To add a new population click Create Population. The following dialog will appear where you choose a list of accessions and give a name to the new population. Please note it is also possible to create a population when you are uploading new accessions into the database. Click on the plus (+) button next to Populations to see all the available populations. Click on a population name to see the accessions in the population. From here you can delete accessions from a population as well as add new accessions to the population. "],["managing-crosses.html", "Chapter 9 Managing Crosses 9.1 Crossing Experiment 9.2 Cross 9.3 Cross Wishlist 9.4 Crossing Experiment Detail Page 9.5 Cross Detail Page", " Chapter 9 Managing Crosses Information for crosses can be managed using the Crosses option in the Manage menu. 9.1 Crossing Experiment Different crosses in the same trial/nursery/project are grouped via crossing experiment. Crossing experiments are organized based on their breeding programs. To find a crossing experiment, you can either type the crossing experiment name in the Search box, or look for the crossing experiment directly in its breeding program by clicking on the + icon. In each breeding program, crossing experiments can be placed directly in the breeding program, or organized in folders. The Folders section allows you to place crossing experiments in folders, move a crossing experiment in a folder to another folder, or rearrange your folders within a breeding program. 9.1.1 Add New Crossing Experiment To add a new crossing experiment, click on Add Crossing Experiment link. Required Information: Crossing Experiment Name: enter a name for the crossing experiment. The crossing experiment name must not already exist in the database. Breeding program: select a breeding program that is available in the database. New breeding programs can be added on the Breeding program page, accessible from the Manage menu. Breeding Program Page Location: select a location for the crossing experiment. New locations can be entered on the Locations page, accessible from the Manage menu. Location Page Year: select a year. Description: enter a description for the crossing experiment. After filling in the information, click Submit to generate the crossing experiment. 9.2 Cross 9.2.1 Add New Crosses Add a cross by using the Add New Cross dialog To add a single new cross, click on Add Cross link. Enter cross information in the popup dialog. Required Information: Crossing experiment: select a crossing experiment available in the database. Location: select a location available in the database. Cross name: enter a name for the cross. The cross name must not already exist in the database. Cross type: the options for cross types are: biparental, self, open pollinated, bulk, bulk selfed, bulk and open pollinated, double haploid, polycross, reciprocal and multicross. The Female Parent and Male Parent field are auto-complete fields for accessions that are already in the database. The parents specified will be entered in the pedigree of the new accessions generated by this cross. Optional Information: Female Plot and/or Male Plot: In addition to the accession names, specific plots used in the cross can also be added to the database. To retrieve plot names associated with each female/male accession, enter your trial name, then click Search Plots. Plot names of each parental accession in that field trial will be shown in the drop-down list, you can then select the plot used in the cross. Additional crossing experimental information such as pollination date, number of flowers, number of fruits, number of seeds can be specified during adding new cross. Alternatively, this information can be updated or edited directly on the Cross Details page. If you know the number of accessions that are generated from the cross, they can be instantiated immediately in the database by clicking the Add accessions for progeny checkbox and specifying the number. Click Submit to generate the cross. Upload New Crosses To upload new crosses from an Excel file (.xls or .xlsx), click on Upload Crosses link. Select a crossing experiment and a location available in the database from drop-down lists and choose a file that you want to upload, then click Upload File. Please check spreadsheet format carefully. The file must be an Excel file (.xls or .xlsx). 9.2.2 Update Crosses by Uploading To upload progenies and/or experimental info of crosses already in the database, go to Manage-Upload page. In the Crosses section, there are links for uploading progenies and experimental info. Please check spreadsheet format in each link carefully. The file must be an Excel file (.xls or .xlsx). Note: crossing experimental information is customized based on the need for each crop. As a result, column headers for experimental info in your database may be different from the information shown in this manual. 9.3 Cross Wishlist An Android ODK application is being developed to record cross information on a mobile device in the field. To link this mobile application with the database, the Cross Wishlist can be used to create a plan for which crosses to perform. This tool is available on the Manage Cross page. It is currently only available on certain databases, so when you click this link you may see an alert mentioning that the cross wishlist is not available on your database. 9.3.1 Create a Cross Wishlist Step 1. Select the accessions to be crossed in your trial There are two interfaces for this step, either Not Using Lists or Using Lists. Depending on if you already have a list of female and male accessions to use, you can decide on which interface to use. The end result of using either interface is the same. We will start by showing Not Using Lists. First select the trial in which the crosses are to be performed. This will populate a select box with all the accessions used in that trial. From here, one or many accessions can be selected as the female accession. Once the female accessions are selected, a table is populated. Each row in this table begins with the female accession that was selected, followed by a select box with all the accessions used in the trial. From here, one or many accessions can be selected as the male to use in the cross. Once the male accessions are selected to cross with each female accession, a table indicating priorities appears. Priority is meant to indicate an order in which to attempt the cross; first the highest priority male will be considered, but if this cross is not possible then subsequent males will be considered. An equal priority can be given and this will not indicate a specific order to follow. Alternatively, we could have used the Using List interface instead. Here we select the trial in which the crosses will be performed and we provide a list of accessions to consider for the females and the males to be crossed. Step 2. Select the female plots to be considered in the crosses After selecting your lists, the table below is populated. The first column has all the female accessions specified and the header row has all the male accessions specified. The males to consider crossing with each female are indicated with priority. After female and male accessions are selected to cross, either by the Nor Using List or Using List interface, click Next. The next dialog will allow selection of specific female plots to use for the cross. Sections for each female accession selected will appear with the field layout displayed. Selecting all plots in which the female is present indicates that the cross should be performed on all plots where that female accession is present. Step 3. Transfer the cross wishlist to your mobile crossing application Clicking Push Cross Wishlst for ODK Use will send the cross wishlist plan to the ONA server for use by the mobile ODK application. Crosses can then be performed and recorded in the field using the mobile application. Afterwards, the crosses are sent back to our database and stored. 9.4 Crossing Experiment Detail Page Information for crosses in the same crossing experiment is compiled in the crossing experiment detail page. Each cross name, female parent, male parent, female plot and male plot has a link to its own detail page, which contains information specific to each one. Note: crossing experimental information is customized based on the need for each crop. As a result, the details of the information in your database may be different from the information shown in this manual. 9.5 Cross Detail Page Information of each cross can also be viewed in its detail page. This page allows you to update or edit crossing experimental information and add progenies related to that cross. Note: crossing experimental information is customized based on the need for each crop. As a result, the details of the information in your database may be different from the information shown in this manual. "],["managing-field-trials.html", "Chapter 10 Managing Field Trials 10.1 Trial Detail Page 10.2 Adding Trials 10.3 Updating Trial Data 10.4 Deleting Trial Data", " Chapter 10 Managing Field Trials To view trial details on the database, click on the Field Trials link under the manage menu on the toolbar. Clicking on the Field Trials link will bring you to the Manage Trials page. On this page, trials are organized according to their breeding programs. To access trial details, click on the + icon next to your breeding program. Trials can be placed directly in their breeding program. Alternatively, they can be organized by using folders within each breeding program. Clicking on trial name will take you directly to the trial details page. 10.1 Trial Detail Page Trial detail page displays important information about individual trial including breeding program, location, year, description of the trial, design, and any files associated with that trial. The Navigator section on the trial detail page allows easy access to all aspects of your trial. This section contains subsections for printing labels for your plots or plants, recording phenotypes, viewing your trial layout or design, viewing phenotypes for this trial, or conducting analyses. The transplanting date field feature will only be shown if it has a value. To add a transplanting date after creating a trial, change the show_transplanting_date parameter from 0 to 1 in the SGN config file. As a result, you will be able to add a date under the transplanting date field by clicking the Edit Trial Details on the trial detail page. 10.2 Adding Trials Only users with the account status of submitter may create trials. To learn how to change your account status from user to submitter visit the 1.2 page. 10.2.1 Prerequisites To add a trial, all of your accessions should already exist in the database before you begin to design a trial. If you have accessions that are not in the database, see the instructions for adding accessions . Breeding program and location for your trial should also exist in the database. If you need to add breeding program and/or location to the database, see instructions for adding breeding program and location in the Managing Breeding Programs and Managing locations respectively. On the Manage Trials page, there are three alternative methods for you to add new trials: by using Add Trial form, Upload Trial form, or Add Multi-location Trial form. 10.2.2 Adding a trial by using Add Trial form Step 1. Begin the Design new trial workflow Click on Design New Trial to begin. The first step in this workflow is an introduction that looks like: Here it gives information about what is required for a trial, including that to create a new trial, you need to create a list of the accessions that you would like to use in the trial. Lists can be viewed, created, and modified with the lists tool at the upper right of the screen. For more information on lists, click here. Step 2. Enter Trial Information On this screen you need to enter basic information about the trial, such as breeding program and location(s). You must also select a design type, such as Complete Block Design. The design is important because it influences how your genotypes are distributed and randomized over the trial. You must first click validate before proceeding to the next step. Step 3. Enter Design Information On this screen you need to specify a list of accessions to use in the experiment. This list must be a valid list of accessions. You must also specify all required design information, such as number of blocks in this case. Step 4. Enter Field Map Information (Optional) On this screen you can specify how the row and column numbers will be generated for the plots in the trial. The row and column number represent a relative position of the plot in the field. If you are not exactly sure of how you will plant the plots in the field or you have an irregular (non-rectangular) layout, you can skip this step for now. This information can be added on the Trial Detail Page once the trial is saved in the database in order to reflect exactly how the plots were planted in the field. Step 5. Custom Plot Naming (Optional) On this screen it is possible to change the format in which plot names will be generated for your trial. It is recommended to skip this step and just use the format generated by the database by default. Step 6. Review Designed Trial On this screen you can review the trial that the database has generated. You will see a graphical representation of the trial. The numbers on the squares represent the plot_number of each plot and on mouse hover you can see further information about the plot. You will also see a table representation of all the plots and their information. If you want to redo the randomization, you can click the Redo Randomization button. At the bottom there is a brief summary of the trial followed by two buttons. Step 7. Add Field Management Factors to your design (Optional) You can add Field Management Factors by clicking Add Field Management Factor(s) to Design. Clicking this opens a dialog to name your factor. You can name this to account for fertilizer or watering regime or inoculation or anything else. This is optional and can be added from the trial detail page afterwards. Click Continue and a dialog will appear where you can specify plots for which the factor was applied. There is a select all button also. Step 8. Saving new trial in the database Once you are done reviewing the trial you can click Confirm to save the generated trial into the database. Once the trial has saved you will see the final completion screen: 10.2.3 Adding a trial from an uploaded file If you already have trial design layout in a spreadsheet, you can add your trial into the database by using Upload Trial form. To access Upload Trial form, click on Upload Existing Trial(s) button on the Manage Trials page. When you click Upload Existing Trial(s) you will see the following workflow. Notice that there are 5 numbered sections to the workflow. Step 1: The first step is to understand what the format of the trial upload is. It is important to understand that the field layout represents plots in the experiment. Each plot has a globally unique plot_name, a sequential plot_number that is unique in the trial (but not globally unique. e.g.101, 102, 103 for three separate plots), an accession_name representing what genotype is planted in that plot, and a block_number representing design replication. Each plot can be thought of as having a row_number and a column_number representing the relative position of the plot in a grid (e.g.the top left plot is row 1 column 1 following by row 1 column 2). Each plot can be planted with an amount of seed from a seedlot, where the seedlot_name represents the specific seed packet that was used, and num_seed_per_plot and weight_gram_seed_per_plot represent amount that were transferred from the seedlot_name to the plot_name. Treatments (management factors) can be applied onto plots using additional column names in your file, where a 1 represents if the factor was applied to the plot and an empty cell means it was not applied. This information and more can be found by clicking Information about file format, which shows the following: Minimum File requirements All accession names in the file must exist in the database. See adding accessions for more information. The uploaded file should be XLS or XLSX file format (NOT CSV). The first row (header) must contain the column names: plot_name accession_name plot_number block_number is_a_control rep_number range_number row_number col_number seedlot_name num_seed_per_plot weight_gram_seed_per_plot Minimal Example: plot_name accession_name plot_number block_number is_a_control rep_number range_number row_number col_number seedlot_name num_seed_per_plot weight_gram_seed_per_plot 2018plot1 my_accession1 101 1 1 2018plot2 my_accession2 201 2 2018plot3 my_accession2 102 1 2018plot4 my_accession1 202 2 1 File validation In case of errors in the uploaded file such as missing or invalid data, a window will appear listing the specific errors in the file that must be corrected before a successful upload. Uploading a trial with Field Management Factors You can upload a trial with field management factor(s) by adding additional column(s). The column header will be the factor e.g.fertilizer, watering regime, inoculation, etc. and the values in these columns will be either 1 or empty, indicating that the factor was applied to the plot or not. Step 2: Once you feel that your experiment field layout is in the right format, click on to the Next Step. You will see the following form which must be filled in completely: The trial name must be globally unique in the database. Please try to follow standard naming conventions for your group. First you need to validate the form, and then you can click Upload Trial. Step 3: In the case where you have uploaded an experiment using accession_names that are not already present in the database, you will be taken to this screen. If the accession_names in your file are all already in the database, this step will be skipped. The reason it is necessary for your accessions to be in the database before you can add a trial using them is that a single accession can be used among many trials and therefore must exist as a separate entity in the database; because of this it is also very important to be careful about adding wrongly duplicated accession_names into the database. From this screen it is possible to make a new list with the missing accession_names and then click Add Accessions to the database to immediately resolve the issue. Once all your accessions are in the database, click to move to the Next Step. Step 4: In the case where you have uploaded an experiment using seedlot_names that are not already present in the database, you will be taken to this screen. If the seedlots in your file are all already in teh database, this step will be skipped. The reason it is necessary for your seedlots to be in the database before you can add a trial using them is that a ginel seedlot can be used among many trials and therefore must exist as a separate entity in the database. From this screen it is possible to add the missing seedlots; you can either upload an XLS or XLSX file to add many at once or you can add them one by one. Once all your seedlots are in the database, click to move to the Next Step. Step 5: If there are any other errors with your file, such as if the plot_names are not globally unique in the database or your plot_numbers are not unique in your trial or row_number is not an integer or any other error, you will see the errors listed in the red box. It is up to you to correct these errors in your file. Simply open up the file you selected earlier in Excel and correct the issues and then save the file. Then you can click Submit Trial and it will resubmit it for you. You can continue to edit your file here and submit as many times as you need until it is accepted. Completion screen Whether you were lucky enough to submit your trial successfully on Step 2 or if you tried many times on Step 5, once your trial has been saved in the database you will see the following screen: 10.2.4 Multi-location trials To add multi-location trials, simply select the multiple locations while using the Add Trial form. This will create a separate trial for each selected location, but they will share the same design and will be grouped in a single folder. By default each trial design will have a fresh randomization, but if desired you may check the Use same randomization for all locations option. 10.2.5 Email alert for multiple trial design upload When uploading multiple trials from a file, you have the option to receive email notifications by clicking the Email Alert checkbox. By default, the system will use the email address associated with your account, but you have the option to enter a different email address if you prefer. After submitting, the upload process runs in the background, allowing you to continue using the interface without interruptions. Once the process completes, you will receive an email with the upload results. 10.2.6 Viewing Plot Layout and Trait HeatMap 10.2.6.1 Viewing plot layout In the Field Layout Tools and Phenotype Heatmap section of a Trial Detail page, the trial physical layout is displayed by default. The relative position of the plots will be displayed based on the row and column positions given to the plots during the trial creation or upload steps. The plots are color-coded based on the plots rep and block numbers and whether or not it is used as a check. Hover the mouse over the plot to see details about a specific plot. If there is more than one trial grown in the same physical field, the trial layouts of all of the trials can be shown together if the trials share these properties: Each trial has the same year Each trial has the same location The location type of the trials location is set to Field The row and column positions of all of the plots (across the related trials) dont overlap. For example, trial #1 starts at row 1 and trial #2 starts at row 10. When these conditions are met and you check the Select Trials in Same Field checkbox, the plots from all of the related trials will be displayed on the same field layout. The plots will be color-coded by trial. The planting order and harvest order downloads will include the plots from all of the displayed trials in the order in which the plots occur in the field. 10.2.6.2 Viewing plot layout for multiple trials Tracking plot images on fieldMap Plot images can be seen on fieldMap if a plot is associated to any image. To view plot image(s), click on a plot, a dialog will appear. On the appeared dialog, click on View plot images. To see more images if a plot has more that 2 images, click on See more images Medium size of an image can be viewed by clicking on an image. Viewing assayed trait heatmap Phenotype heatmap can be viewed by selecting a specific assayed trait from the selectbox drop-down. Mousing over the plots, highlights the plot in green and also displays the plots field information including the selected traits phenotype value. Suppressing Plot Phenotype Clicking on a plot on the heatmap would display a dialog that has a button for suppressing a plot phenotype value for a given trait. A suppressed plot value can be excluded during trial analysis and phenotype download. 10.2.7 Adding additional information in the Trial Detail page After you added a new trial to the database, you can edit trial details or add more information for that trial through theTrial Detail page. Uploading Physical Trial Layout You can upload physical trial layout by clicking on the Upload trial coordinates button on the Trial Detail page. Please check file format carefully. You can find file format information by clicking on the Spreadsheet format on the Upload trial coordinates window. Spreadsheet format: Physical Trial Layout File requirements All plot names in the file must exist in the database. The uploaded file should be tab delimited (txt). The first row (header) must contain the column names Example: plot_name row_number col_number plot1 1 1 plot2 1 2 plot3 1 3 Select the trial layout coordinates file that you want to upload for this trial, then click OK button to upload the file. The following message is displayed after the coordinates are uploaded. The field layout can be viewed by clicking on the Trial Heatmap Section to see a drop-down of the field map. Downloading Field Map Spreadsheet Field map spreadsheet can be downloaded if the trial has field coordinate (row and column numbers) uploaded for it plots. To download, click on the Download FieldMap Layout link on the Trial Heatmap section. A dialog will appear, click on the submit button to download. Click to view downloaded spreadsheet. Editing Physical Trial Layout Usage Help link contains information on how to edit physical trial layout. There are three different options for editing trial layout: Replacing plot accession by clicking on the plot in the layout. Replacing trial accession by using Edit Field Map link. Substituting plot accessions by using Edit Field Map link. When you move a cursor over a plot on the trial layout, information for that plot appears. To edit a specific plot, clicking on that plot. Entering new accession on the Replace Plot Accession form, then clicking on Replace Plot Accession button. To replace an accession (in every plot/plant of that accession), clicking on Edit Field Map button. On the Edit Field Map window, clicking on Replace Accession button. Selecting any accession that you want to replace and entering your new accession, then clicking Replace Trial Accession button. You can switch plot accessions between any two plots by clicking on Substitute Accession button. On the Substitute Plot Accession form, selecting the two plots that you want to switch, then clicking on the Substitute Plot Accession button. 10.2.8 Downloading the Trial Layout from the Trial Detail page Click on Download Layout on the Trial Detail page. The trial layout includes all information regarding the observation units in the experiment. The observation units can be plots, plants, or subplots. The trial layout can include trial design information such as the block_number and rep_number. It can also include physical map information such as the row_number and col_number, if that information is available for the trial. The trial layout also includes information regarding treatments that have been applied in the field. Optionally, the layout can give information regarding accessions global performance for a list of traits. 10.2.9 Adding Plant Entries To Your Trial After you added a new trial to the database you can choose to add plant entries to your trial. Adding plant entries enables plant level phenotyping. It is generally better to enter data at the plant level into the database because it is always possible to calculate plot level phenotypes from the individual plant data. Plant entries can be added to your trial in two ways: 1) Automatically generated by the database. The only input required is the number of plants per plot. 2) Uploaded in an XLS or XLSX file. This allows you to specifically name your plant entries. These two options are available in the Plant Entries section on the Trial Detail Page, as shown in the screen shot below. Automatically Generate Plant Entries Clicking on Add plant entries opens the following dialog box. The only input required is the number of plants per plot. This will create plant entries that are named as a concatenation of the plot_name and the plants index number e.g.plot_name_plant_1 Upload Plant Entries Alternatively, you can choose to upload an XLS or XLSX file that contains the names of the plant entries. Clicking on Upload plant entries opens the following dialog box. Clicking on Spreadsheet format will give you information about the XLS or XLSX file to upload. Clicking this will open the following dialog box. This shows you that the files requires the header to contain plot_name and plant_name. The plot_name must exist in the database already and the plant_name must be unique in the database. Along with the file, you must specify number of plants per plot. This is intended to be the total number of plants that were plants. If the file you upload shows three plants in one plot and four plants in another plot, that is fine. 10.2.10 Adding Tissue Sample Entries To Your Trial Some trials require tissue samples to be collected from plants in a field trial. The database will generate these tissue sample identifiers for you and will maintain all relationships with the plant, plot, accession, etc. To begin, go to the Design section of a trials detail page and open the tissue sample entries section. Please note that tissue samples are directly related to plants, therefore your trial requires plants before you can add tissue samples. When you click on Add tissue sample entries you will see a dialog where you specify the number of tissue samples you require per plant. Once you have specified how many tissues samples, you can give specific words to distinguish samples, such as root or stem, as seen below. Once you have added tissue sample entries they will appear in the design section of the trial as seen below. Each tissue sample has a detail page where you can add information about the sample, such as if it is in transit or in storage somewhere. The related stocks section near the bottom of this detail page displays the relationships between all stocks, including tissue samples. 10.2.11 Uploading GPS Coordinates For Plots You can upload GPS coordinates for the plots in your trial. There is a link on the Trial Detail Page as shown below. Clicking on this link will bring up the following dialog. Here you can upload an XLS or XLSX file. To see information on the format of the file that should be uploaded, click on Spreadsheet format. This will bring up the following dialog. This dialog tells you that the file must be XLS or XLSX and must contain: plot_name WGS84_bottom_left_x WGS84_bottom_left_y WGS84_bottom_right_x WGS84_bottom_right_y WGS84_top_right_x WGS84_top_right_y WGS84_top_left_x WGS84_top_left_y The GPS coordinates should be WGS84 format and specify a four-pointed polygon around the plot. 10.2.12 Repetitive Measurements Section If a trial includes repetitive traits or time-series values, you can effectively view and analyze these values through the Repetitive Measurements Section. Start by selecting the desired trait from the trait drop-down menu. Next, define the date range by either using the date-range picker or an interactive slider, which allows you to dynamically adjust the period you wish to examine. Once the date range is set, determine how to handle the repetitive measurements by choosing from various options such as First Value, Last Value, Averaged Value, Sum Values, or All Values. Choosing the All Values option enables an additional feature that visualizes the trend of the values over time, helping you identify patterns and trends within the data. 10.2.13 Uploading Additional Files To Trial It may be of interest to you to upload additional documents, images, or recordings to your trial. To do this, scroll down to the Uploaded Additional File section on the trial detail page. From here you can view and download any of these additional files. To upload an additional file, click on the Upload Additional Files link. A dialog will appear where you simply select your desired file. For information, you can click Upload information to see the following message. 10.3 Updating Trial Data To updated the trial-level metadata (such as the planting date, design type, description, etc) of one or more existing trials, click the Update Existing Trial(s) button from the Manage > Field Trials page. This upload can also be used to rename trials or move trials to a different breeding program. In order to update a trial, you must be a curator or a submitter (that is associated with the breeding program of the trials). Here you can upload a file that contains the new metadata for the existing trials in the database. The first column is labeled trial_name and includes the name of the existing trial. Additional columns can be included for the metadata you want to update. Any columns not included in the file or values left blank will leave the existing metadata unchanged. The columns that can be included are: new_trial_name: A new name for the trial, must not already exist in the database breeding_program: The name of breeding program that managed the trial, must exist in the database. location: The name or abbreviation of the location where the trial was held, must exist in the database. year: The year the trial was held. transplanting_date: The transplanting_date of the trial was conducted. Date in YYYY-MM-DD format or remove to remove the date planting_date: Date of Planting in YYYY-MM-DD format or remove to remove the date harvest_date: Date of Harvest in YYYY-MM-DD format or remove to remove the date design_type: The shorthand for the design type, must exist in the database. Possible values include CRD: Completely Randomized Design, RCBD: Randomized Complete Block Design, RRC: Resolvable Row-Column, DRRC: Doubly-Resolvable Row-Column, ARC: Augmented Row-Column, Alpha: Alpha Lattice Design, Lattice: Lattice Design, Augmented: Augmented Design, MAD: Modified Augmented Design, greenhouse: undesigned Nursery/Greenhouse, splitplot: Split Plot, p-rep: Partially Replicated, Westcott: Westcott Design description: Additional text with any other relevant information about the trial. trial_type: The name of the trial type, must exist in the database. Possible values include Seedling Nursery, phenotyping_trial, Advanced Yield Trial, Preliminary Yield Trial, Uniform Yield Trial, Variety Release Trial, Clonal Evaluation, genetic_gain_trial, storage_trial, heterosis_trial, health_status_trial, grafting_trial, Screen House, Seed Multiplication, crossing_block_trial, Specialty Trial plot_width: plot width in meters plot_length: plot length in meters field_size: field size in hectares 10.4 Deleting Trial Data To delete a trial data, click on the Delete trial data section. There are links to delete traits, layout and trial entry data. To delete assayed trait data, click on Delete trait data link. On the appeared dialog, confirm deletion by clicking on the Select Traits For Deletion button, then select one or more traits to delete from the trial. To delete trial layout data, click on the Delete layout data link. Confirm deletion on the appeared dialog. To Delete trial entry, click on Delete trial entry link. Confirm deletion on the appeared dialog. "],["managing-genotyping-plates.html", "Chapter 11 Managing Genotyping Plates 11.1 Adding a New Genotyping Plate 11.2 Genotyping Plate Detail Page", " Chapter 11 Managing Genotyping Plates Genotyping Plates represent the content of a genotyping plate sent to a genotyping facility (e.g.samples in specific wells). To streamline this process, it is possible to upload this information or let the database create a plate for you. Once the genotyping plate is saved in the database it is then possible to export the information directly to genotyping facilities that are BrAPI compliant. The genotyping facility can then provide status information to us via BrAPI. To begin go to Manage->Genotyping Plates. Here the genotyping plates are divided by Breeding Program. These sections can be expanded by clicking on one. 11.1 Adding a New Genotyping Plate To begin, click on Add Genotyping Plate. Notice that this form is split into three sections: Plate Information, Well Information, and Confirm. The first section is for defining information about the genotyping plate, such as a Plate identifier, plate format (96 well), etc. The second section is for defining the samples in the wells, such as sample names, sample concentrations, well position, etc. The final section is for Submitting the info. All fields in the Plate Information section are required. In the Well Information section you can choose to either 1) Upload an XLS or XLSX spreadsheet with your sample layout or 2) let the database create the sample layout. If you choose to upload an XLS or XLSX spreadsheet, the Spreadsheet Template info requires the following: In either case, the sample identifier is generally a concatenation of Plate name and well position, e.g.MyGenotypingTrial1_A01. In either case, you need to provide a source_observation_unit_name for each sample. This can be a tissue sample name, a plant name, a plot name, or an accession name; however, in any case, the identifier must already exist in the database. This allows us to link the sample in the well to specific field trial plots, or, plants, or tissue_samples. If you only know which accession is in the well, you can use the accession name. In the final Confirm section you can decide whether to submit this information to the genotyping facility you selected. This requires that the genotyping facility is BrAPI compliant to work. 11.2 Genotyping Plate Detail Page If you open a specific genotyping plate, it will take you to the detail page. Here you can see the Accessions used in the plate (if you created the trial and the source_observation_unit_names you used were plots, this will still work because we know the accession of the plot or plant or tissue sample). Further down you can see a graphical representation of your plate with well positions. This can be 96 well or 384 well depending on your plate format. "],["using-fieldbook-app.html", "Chapter 12 Using Field Book App 12.1 A typical workflow 12.2 Creating Field Layout Files for the Field Book App 12.3 Creating Trait Files for the Field Book App 12.4 Transferring Files from Your Computer to Android Tablet 12.5 Setting up Field Book App for data collection 12.6 Exporting Files from Field Book App 12.7 Uploading Phenotype Files to an SGN database", " Chapter 12 Using Field Book App SGN databases support the Android Field Book App for collecting phenotypic data in the field with tablet computers. The app is available here: https://play.google.com/store/apps/details?id=com.fieldbook.tracker The app can also be downloaded directly from the Google Play store. There is no charge for the app. Field Book App requires two files for collecting data: Field layout file and trait file. SGN databases can generate the field layout file and trait file, which can be downloaded onto your computer, then transferred to an Android tablet device. 12.1 A typical workflow Creating a field layout file based on the design of field trial Creating a trait file from the list of traits Downloading the field layout file and trait file from the database to your computer Downloading the field layout file and trait file to the tablet (where the Field Book App is installed) Collecting phenotypes Exporting phenotypes from Field Book App to your computer Uploading the exported phenotype file from your computer to the database 12.2 Creating Field Layout Files for the Field Book App There are two alternative methods for creating Field Layout Files. Using Field Book Tools page Using Trial Detail page. 12.2.1 Creating Field Layout Files by using Field Book Tools page. To access Field Book Tools page, clicking on Field Book App in the Manage menu. On the Field Book Tools page, clicking on New On the Download Fieldbook window, selecting trial name and data level (plots or plants), then clicking on Submit button. A treatment can be selected, which allows you to record phenotypes based on treatment application. A list of traits can be selected, which provides a summary of an accessions global performance for those traits in the Fieldbook. If the field book layout file was successfully created, a pop-up window will indicate that the field book layout file was saved successfully. Clicking on the file name will immediately download the file onto your computer. The file is also available to download on the Field Book Tools page, if you need to re-download it. To download field layout file to your computer, clicking on Download File, the file can then be transferred to your tablet. If you no longer want to keep the field layout file, clicking on Delete Layout File. 12.2.2 Creating Field Layout Files by using Trial Detail page. To create Field Layout Files, go to the Trial Detail page of the trial that you want to create the file. On the Trial Detail page, scrolling down to the bottom of the page to find Android Field Book Layout in the Files section, then clicking on the Create Field Book link. Clicking on the Create Field Book link will open a new window showing the name of the trial that you selected, as well as data level (plots or plants). A treatment can be selected, which allows you to record phenotypes based on treatment application. A list of traits can be selected, which provides a summary of an accessions global performance for those traits in the Fieldbook. To proceed, clicking on Submit button. If the field book layout file was successfully created, a pop-up window will indicate that the field book layout file was saved successfully. Clicking on the file name will immediately download the file onto your computer. The file is also available to download on the Field Book Tools page, if you need to re-download it. To download field layout file to your computer, clicking on Download File, the file can then be transferred to your tablet. If you no longer want to keep the field layout file, clicking on Delete Layout File. 12.3 Creating Trait Files for the Field Book App Steps to Create a Trait File: 12.3.1 Creating a Trait List After you logged in, lists can be created and managed using the Search Wizard or the Lists link. For more information on how to create lists, click here. 12.3.2 Creating a Trait File After you have your trait list, clicking on the Field Book App link found under the Manage menu tab. This will take you to the Field Book Tools page. To create a new trait file, finding the heading Trait Files, then clicking on the New link. Clicking on the New link will open a dialogue box titled Create Trait File. Please enter your Trait file name and select List of traits to include from drop-down list that you previously created. You can only use traits included in the list. Check the box titled Include Notes Trait if you would also like to record and upload general plot notes in the field. Click OK to submit. If your trait file was successfully created, a new window will indicate that the trait file was saved, then clicking on Close. After the trait file was saved, you will see your file listed in the Field Book Tools page. Clicking on Download link to download the trait file to your computer. After downloading the trait file to your computer, the file can be transferred to an Android Tablet. You need the Android Field Book App to open the file. The Android Field Book App can be downloaded at: http://www.wheatgenetics.org/bioinformatics/22-android-field-book 12.4 Transferring Files from Your Computer to Android Tablet 12.4.1 Files on your computer After downloading, Field Layout files and Trait files can be found in the Downloads folder of your computer. Field Layout files on your computer will have a prefix fieldbook_layout_ added to the beginning of the file name. For example: 2014-01-28_19:14:34_Trial Demo_location 6767.xls on the the database website will be saved as field_book_layout_2014-01-28_19:14:34_Trial Demo_location 6767.xls on your computer. The files can be transferred to Android tablet by copying the files into the tablets Internal Storage File. 12.4.2 Files on your Android tablet To transfer Field Layout file and Trait file to your Android tablet, connecting an Android tablet to your computer, then clicking on tablet icon on your computer. Clicking on the tablet icon will open a window showing an Internal Storage file. After you installed the Android Field Book App, all files for the app are stored in the fieldBook folder within the Internal storage folder. Within the fieldBook folder, there are five sub-folders: field_export field_import plot_data resources trait Field Layout files must be copied into the field_import folder. Trait files must be copied into the trait folder. You can either drag and drop, or copy the Field Layout file and the Trait file from your computer to the folders in your Android tablet. 12.5 Setting up Field Book App for data collection After you transferred the Field Layout file and Trait file from your computer to Android tablet, you still need to set up Field Book App on your tablet for data collection. To set up the Field Book App: To open the Field Book App in the Android Tablet, clicking on the Field Book App icon, which is a green rectangle. To import Field Layout files, clicking on the Fields section of the main menu of the Field Book App. Clicking on the Fields tab will open a new dialogue that will let you select the file that you want to import. Choosing a Field File will generate a new dialogue that will ask you to choose between an Excel or CSV format. Since the data from the database is in Excel format, choose the Excel option. After submitting the file format, a final dialogue box will appear. Please provide information about the file that you want to import. Please ensure that plot_name is set as the unique identifier. To finalize the process, clicking OK button. To import Trait Files, clicking on the Traits tab on the main menu of the Field Book App. Then, clicking on the three dots symbol found on the upper right corner of the Field Book screen. This will open a drop down menu with the choices Import and Export. Clicking on Import Clicking on import will open a new dialogue that displays a list of trait files that you can select to import to the Field Book App. The trait file is now imported into the Field Book App. The traits page will show all trait files and available traits. 12.6 Exporting Files from Field Book App Data that were collected on the Field Book App can be exported back to your tablet folder, which can then be transferred to your computer. To export files containing data from the Field Book App to your tablet, clicking on the Export link on the main menu page of the Field Book App. Clicking on the Export link will open a new dialogue window. To ensure that data are exported in a correct format for the database, checking the Database Format box, then clicking on OK button. The exported file can then be found in the field_export sub-folder within the fieldBook folder on your tablet. Once you connect your tablet to your computer, you can directly transfer the file to your computer. 12.7 Uploading Phenotype Files to an SGN database To upload phenotype files to the database, clicking on Field Book App in the Manage menu. On the Field Book Tools page, clicking on Upload link in the Uploaded Phenotype Files section. Clicking on the Upload link will open a new dialogue asking you to choose a file that you want to upload to the database website. Please ensure that plot_name is the first column of the file to be uploaded. To make sure that the file has the correct format for uploading, click on the Verify button. After the file format has been verified, click on the Store button. The list of uploaded phenotype files can be found on the Field Book Tools page The uploaded files will also be seen in the corresponding Trial Detail page. "],["managing-phenotypic-data.html", "Chapter 13 Managing Phenotypic Data 13.1 Uploading Fieldbook Phenotypes 13.2 Uploading Spreadsheet Phenotypes", " Chapter 13 Managing Phenotypic Data To facilitate uploading process for phenotypic data, Manage Phenotypic Data page provides two options for uploading: Field Book Phenotype file in database format and phenotype file in Excel (.xls or .xlsx) file format. To access Manage Phenotypic Data page, clicking on Phenotyping in the Manage menu. 13.1 Uploading Fieldbook Phenotypes 13.1.1 Export Field Book Database File The database upload of Field Book phenotype data relies on the Database format from the Field Book. Please make sure to export the Database format from the Field Book if you intend to upload the data using the Field Book Upload we describe below. If you prefer to use the Table format that the Field Book exports, you can modify this format to work with the Speadsheet Upload we describe below. 13.1.2 Upload Field Book Database File To upload a Field Book Phenotype file in a database format, click the Upload Fieldbook link The Upload Fieldbook link on this page and Upload link on the Field Book Tools page open the same dialogue. Please follow instructions for uploading phenotypic files on the 12 page. 13.2 Uploading Spreadsheet Phenotypes To upload a phenotype file in an Excel (.xls or .xlsx) file format, click the Upload Spreadsheet link. Please specify Data Level (Plots or Plants) and select the Excel file that you want to upload. 13.2.1 Generating Spreadsheet File You can find more file format information by clicking on Spreadsheet Format link. Clicking on Spreadsheet Format will open the following dialog. Clicking on Create Phenotyping Spreadsheet will bring up a dialog where you can indicate the trial(s) you are interested in and the trait list you are interested in. Clicking Submit will download the xlsx file onto your computer, where you can then fill in the phenotypes. 13.2.2 Uploading Spreadsheet File To ensure that the file has a correct format for uploading, click on the Verify button. This will check the contents of the file and also perform quality checks on the values in the file. These checks include checking the trait definition for categorical values, minimum and maximum values, and data type checking. It will also check if there are already values uploaded for the given observation units and traits. If there are, there is an option to overwrite the existing values with the new values in your file. If the file is valid, only then can you click Store to store the information in the database. "],["managing-barcodes.html", "Chapter 14 Managing Barcodes", " Chapter 14 Managing Barcodes SGN databases provide tools for generating barcodes for stock identification. To access Barcode Tools page, clicking on Barcodes in the Manage menu. Barcode Tools page provides four options for generating barcodes: Single barcode Multiple barcodes Plot phenotyping barcodes Trial barcodes To generate single barcode, clicking on Generate Barcode link on the Barcode Tools page. In the Generate Barcode section, specify the name of the barcode, size of the barcode, then clicking on Generate Barcode The database will generate a barcode for your stock. The barcode can be printed for your stock identification. It also appears on its corresponding stock page. If you have a list of stocks that you want to generate barcodes, you can use Download Stock Barcodes section. You have three options for entering stock names: Typing in stock names, or copy and paste from other file into the box (1) Choosing a list of stocks from your Lists (2), and transferring the list into the box (1) by clicking on paste button. Uploading a Tab-delimited Text File with stock names. Select an optional printing format from the available formats. You can select printer settings that you prefer in the Printer Settings section. After you enter stock names and specify printer settings, clicking on Download Barcodes button at the bottom of the page. If you have a list of plots that you want to generate phenotyping barcodes, you can use Download Plot Phenotyping Barcodes section. You have three options for entering plot names: Typing in plot names, or copy and paste from other file into the box (1) Choosing a list of plots from your Lists (2), and transferring the list into the box (1) by clicking on paste button. Uploading a Tab-delimited Text File with plot names. If you have a list of trials that you want to generate barcodes, you can use Download Trial Barcodes section. You have three options for entering trial names: Typing in trial names, or copy and paste from other file into the box (1) Choosing a list of trial from your Lists (2), and transferring the list into the box (1) by clicking on paste button. Uploading a Tab-delimited Text File with trial names. "],["using-the-label-designer.html", "Chapter 15 Using the Label Designer", " Chapter 15 Using the Label Designer Breedbase provides an interactive design tool for creating custom labels. To access the Label Desginer, click on Label Designer in the Manage menu. The following sections explain your many options as you advance through each step of the design workflow. 15.0.1 First Select a Datasource The first step is to select a data source. Since the label designer can generate labels for different data types, you can optionally filter the source selection by the data type youre interested in. Then, select a field, genotyping, or crossing trial to populate your labels with the trial design information. Or select a list to populate your label with the list contents. For data sources with multiple levels of information you will also be asked to pick a level (plot, plant, etc.) before proceeding. To generate plot-level labels for more than one trial at once, select a list of trials as the source and plot as the level. 15.0.2 Set Page and Label Size Now choose whether to create a new design or load a saved design. If you choose new, you will be prompted to select a page size and label size. If you do not see your page or label size as an option, then select Custom and enter your desired dimensions in pixels, or 1/72nds of an inch. If you choose saved, you will be prompted to select a saved design then will be taken directly to the design step with the saved design elements preloaded. 15.0.3 Design Your Label Below is a draw area where you can begin adding elements to your label. First select a type, then field, size, and font, then click Add You can add text to an exisiting field or create a completely custom field by clicking Create Custom Field Once added, you can drag and drop elements, or delete them by clicking on the red box in their upper left corners. Barcodes can also be resized by dragging on the green box in their lower right corners. If you are creating labels for a trial it is highly recommended to include a barcode encoding your plot, plant, or tissue sample names. These are your unique identifiers that will need to included with any phenotypic or genotypic measurements loaded into the database. When you are satisfied with your design, click next! 15.0.4 Adjust Formatting, Save, and Download Last step! Here you can tweak your formatting and page layout, save your design, or download your labels. The additional settings dialog will allow you to fine tune the print margins and margins between labels. The units are pixels or 1/72nds of an inch. Its not recommended to change these until youve already done a test print. You can also set the # of copies per label, filter by rep, or download just the first page for test purposes. To save youre design just type a unique name and hit save. This will save your design to your list manager where you can set it to public to share it with others. Finally if you are ready just hit download to generate and download your labels! "],["managing-downloads.html", "Chapter 16 Managing Downloads", " Chapter 16 Managing Downloads You can download phenotype, trial meta-data, pedigree, GBS genotype and GBS genotype QC files from the database to your computer by using Lists. To download, clicking on Download in the Manage menu. For each category, you can select a list of accessions from your Lists to download their phenotypes, pedigree, GBS genotype, GBS genotype QC. In the case of downloading trial meta-data, you would provide a list of trials, while for downloading phenotype and GBS genotype QC, you can also use a list of trials or traits. "],["managing-odk-data-collection.html", "Chapter 17 Managing ODK Data Collection 17.1 ONA Crossing Information", " Chapter 17 Managing ODK Data Collection To access this page go to Manage and then ODK Data Collection. ODK is used for remotely collecting data on Android and IOS devices. We currently are working to support two ODK service providers, namely ONA and SMAP. We are using ONA to collect crossing information, including all lab activities following seed production. We are using SMAP for phenotypic data collection. 17.1 ONA Crossing Information 17.1.1 Managing ONA Crossing Information To begin collecting data using the ONA ODK form you must first have a crossing plan in the form of a Cross Wishlist. To do this from this page, click the Export Cross Wishlist to ONA button. Please refer to the Create Cross Wihlist help section for more information. It is possible to view the current available cross wishlists by clicking the Export Cross Wishlist to ONA button and then clicking Available Cross Wishlists. Once your cross wishlist is available, you can use your mobile ODK application to record crosses being done realtime. You can also record all laboratory activities following seed extraction up to greenhouse plantlet hardening. As you collect data using your mobile ODK application, your responses will be synchronized with our database. The Schedule Import for Selected Form section gives you options to perform the import daily or more frequently. It is also possible to initiate a data import from ONA at anytime by clicking Import Crossing Data from Selected Form on ONA. 17.1.2 Reviewing Plant Status The mobile ODK application has options to collect information about the status of plants in the field, such as if they are flowering. Images for each plant can also be recorded. The database will report this information here in a summary table that looks like the following. Notice that images are also transferred to the database. 17.1.3 Graphical Summary For Performed Crosses There is a section to summarize activities done for each cross. In this table each row represents a single cross performed. All the activities that have been performed will be shown here, such as first pollination and embryo rescue. The scatter plot shown tracks seed numbers generated on the Y axis and date of activity on the X axis. 17.1.4 Summary Information For Performed Crosses There is a secondary section to summarize what has been done across the entire Cross Wishlist. This tree structure shows all activities performed for a cross and shows how these crosses relate to the Cross Wishlist. "],["managing-tissue-samples.html", "Chapter 18 Managing Tissue Samples 18.1 Tissue samples from field trials 18.2 Genotyping Plate Tissue Samples (96 or 384 well plates)", " Chapter 18 Managing Tissue Samples To access this page go to Manage and then Tissue Samples. 18.1 Tissue samples from field trials A field trial contains plots planted with a specific accession. Each plot can contain many plants, which in turn can contain many tissue samples. On the manage tissue sample page we can see the field trials that contain tissue samples already. We can choose to download the tissue sample layout as seen in the below picture. If the field trial you want to collect tissue samples from is not in the above table, you can click the button highlighted below. Once you have clicked this button, you will enter a workflow that begins with the following introduction. Once you click next, you will need to select your trial. Next, if your trial currently only has plot entries saved, you will be asked to enter how many plants are in each plot. Finally you will be asked how many tissue samples you want for each plant. You can specify a string to include in the tissue sample name, such as leaf or root. Afterwards you should see the following success message, indicating that the tissue samples are saved. 18.2 Genotyping Plate Tissue Samples (96 or 384 well plates) A genotyping plate represents a 96 or 384 well plate. You can use the Coordinate Android application to create your plate layout, or you can upload your own Excel plate layout, or you can use the database to generate a plate layout. Ideally, you will use tissue sample names originating from a field trial as the source for each well tissue sample, but you can also use plant names, plot names, or accession names. From the manage tissue samples page, you can see the genotyping plates saved in the database. You can also download the layouts as shown below. If you need to create a new genotyping plate, you can click the button shown below. This will guide you through a workflow for uploading or creating the new plate layout. Genotyping vendors require you to send a plate layout during submission. You can download the plate layout as shown above, or you can go to a genotyping plate detail page to download the Intertek formatted file. In the future you will be able to directly export your genotyping plate plate layout to vendors. "],["managing-observation-variables.html", "Chapter 19 Managing Observation Variables 19.1 Managing Observation Variables with Traits, Methods, and Scales", " Chapter 19 Managing Observation Variables 19.1 Managing Observation Variables with Traits, Methods, and Scales Observation variables are the identifiers used when collecting phenotypic data. An observation variable is composed of a trait, a method, and a scale. The trait describes the attribute being measured e.g.Plant Height. The method defines the protocol in which the trait was observed e.g.Using a one meter long measuring stick. The scale defines the units or dimensions for which the measurement was taken e.g.Meters. Generally, observation variables are defined in ontologies that are predefined. We often use ontologies from cropontology.org. In this case, you will not be able to define your own observation variables directly; instead, you will need to contact us and we will add the observation variable for you. For databases where the user has greater control, we have an interface to allow addition of observation variables, along with traits, methods, and scales. To begin, go to the Search->Traits page. If the database you are on allows you to directly add observation variables, you will see the following button at the bottom of the page. When you click the button, the following workflow will appear. You should be logged in or else it will not allow addition of the observation variable. The workflow begins with an introduction. On the next workflow step, you select the ontology that you want to insert the new observation variable into. You must also give a name and a definition for the new observation variable. On the next workflow step, you select the trait ontology to use. Once you select a trait ontology, a select containing all the terms in the selected ontology will appear. You can either select a trait or if it does not exist in the select, you can create a new one by giving a name and a definition for the new trait. On the next workflow step, you select the method ontology to use. Once you select a method ontology, a select containing all the terms in the selected ontology will appear. You can either select a method or if it does not exist in the select, you can create a new one by giving a name and a definition for the new method. On the next workflow step, you select the scale ontology to use. Once you select a scale ontology, a select containing all the terms in the selected ontology will appear. You can either select a scale or if it does not exist in the select, you can create a new one by giving a name and a definition for the new scale. You can also define a format, minimum, maximum, categories, and default value for the new scale. On the last page of the workflow, you confirm the submission. Afterwards, you can use the newly created observation variable ontology term in your phenotyping. "],["managing-image-data.html", "Chapter 20 Managing Image Data 20.1 Image-Phenotyping Dashboard 20.2 Image Input 20.3 Standard Process 20.4 Ground Control Points", " Chapter 20 Managing Image Data 20.1 Image-Phenotyping Dashboard Upload raw image-captures in a compressed file (.zip) for orthophotomosaic assembly or upload previously stitched orthophotomosaic raster (.PNG, .JPG) imagery. Dashboard shows all field trials and uploaded imaging events in collapsible sections. Follow standard processes to manually create templates for assignment of plot-polygon images to the field experiment design. All imagery is shown with the spectral category within collapsible sections. Figure shows NIR imagery. Apply Fourier transform filtering, thresholding, and vegetation index masking. Plot-polygon images for all image processes are shown. Extract and export phenotypic values from plot-polygon images for analyses and model training. 20.2 Image Input Clicking Upload Imagery will open the following dialog. Raw-captures can be uploaded in a compressed (.zip) file so that they can be assembled into an orthophotomosaic. If orthophotomosaic assembly is not required, raster images (.PNG, .JPG) can be uploaded. Example data is given for raw Micasense RedEdge 5-band multispectral captures and for stitched orthophotomosaics. To begin uploading images, a field trial must be selected. The field trial must already be saved in the database. For information about adding a field trial, please read the Field Trial documentation. The image data is added to an imaging (drone run) event. Here you can select a previously saved imaging event or you can create a new one by defining a name, description, and date. The uploaded data can be raw image-captures or complete raster images. Here you can select whether orthophotomosaic stitching is required. In the case that orthophotomosaic stitching is required, select yes. On the next step you will see the following: Upload a zipfile with the raw-captures. When uploading Micasense RedEdge raw-captures, provide images of the Micasense calibration panels in a zipfile as well. In the case that orthophotomosaic assembly is not required, simple upload the raster images. Select the number of image bands that will be uploaded e.g.for a five band multispectral camera, select 5. In the caes that orthophotomosaic stitching is not required, select no. On the next step you will see the following: Upload an image at each band with a unique name, description, and spectral type. 20.3 Standard Process Once imagery is uploaded, it will appear on the dashboard under the field trial. Clicking the Run Standard Process button will begin extracting plot-polygon phenotypes from the imagery. Clicking the button will open the following dialog. Select a drone run band to use in this process. In the case of the Micasense 5 band multispectral camera there will be 5 bands shown here; select the NIR channel in this case because it has the highest contrast. In the case of standard color images, there will only be the RGB Color Image option here. Rotate the image so that there the plots are oriented in a grid fashion. There can be a skew in the field layout, as seen in the following example. Perform a rough cropping of the image by clicking on the four corners of the field. Cropping is important to remove any extraneous parts of the image. This step shows a histogram of the cropped image. The standard process will magnitude threshold the top and low ends of the distribution. In this step, the template for the plot polygons in the experimental field design are associated to the image. First, defined the number of rows and columns in the field experiment. Then click the four corners of the image, in respect to the top right, top left, botton left, and bottom right positions. Next click on Draw Plot Polygon Template. Review the template and clear/repeat the process until the template matches well. It is possible to copy/paste templates in the case where there are large breaks in the field design. Next, scroll down to the assign Plot Polygons to Field Trial Entities section. Select the location of Plot Number 1 as either top left or top right and whether the field design is serpentine or zigzag. Click on Generate Assignments and review that the names of the plots appear correctly in the overlay on the image. Finally, click Finish and Save Polygons to Plots when you have have confirmed the assignments. Next, the dialog shows you that the standard process will be repeated for all uploaded image bands. Next, choose which vegetation indices to apply. Next, choose the phenotypic values to extract. You must define the time point for which the phenotype is; if the field trial has a planting date, the time point will automatically be populated as image date minus the planting date. After completing the standard process, the job will continue in the background until it completes. You can check the status of the job from the dashboard. 20.4 Ground Control Points Ground control points can be saved after an imaging event has undergone the standard process on orhomosaics. Ground control points can then be used across imaging events on the same field experiment in order to automate the entire standard process. "],["managing-vcf-data.html", "Chapter 21 Managing VCF Data 21.1 Uploading VCF Data 21.2 Searching and Downloading VCF Data 21.3 Searching Protocols 21.4 Detail Pages and Deletion", " Chapter 21 Managing VCF Data 21.1 Uploading VCF Data Genotyping data in VCF can be loaded from the web-interface. Breedbase can store any genotypic variants from a VCF, allowing for polyploids, structural variants, etc. without problems. To begin go to Manage->Genotyping Plates and click the button seen below: Note that you do not need to have genotyping plates uploaded to upload VCF data; you may upload genotyping data to accessions or you can upload genotyping data for tissue samples in genotyping plates. The workflow begins with an intro: On the following step in the workflow, a genotyping project is defined or selected. A genotyping project is a high-level entity for grouping several genotyping events. It is defined with a name, description, name, breeding program, and genotyping facility (IGD, Intertek, etc.). The following step is to define or select a genotyping protocol. A genotyping protocol represents the set of markers being called against a specific reference genome. A genotyping protocol is defined with a name, description, reference genome name, species name, and a location of data generation. Note in the picture that you can select whether the samples in your file are accessions or tissue samples in the database; tissue samples are for when a genotyping plate is stored in the database. There is an option to parse the sample names for appended sequencing numbers from IGD, where the sample names are like accession:igdnumber. The final step is to select the VCF from your computer and upload it. The web interface can be used to upload files arbitrarily large; it is a NGINX configuration to set this size. 21.2 Searching and Downloading VCF Data The Search Wizard is the primary means of querying data in the database. Go to Search->Wizard to begin. Once genotyping protocols are stored, select Genotyping Protocols from the first dropdown menu. Then if you select one or more and select Accessions from the second dropdown menu, you will see the accessions for which genotypes were stored. As seen in the following picture, there is a section for filtering genotypes by chromosome, start position, and end position. Genotypes can be downloaded in VCF or DosageMatrix formats. Using the Default genotyping protocol which is configured in a system, you can query over field phenotypic evaluations before downloading genotypes and phenotypes. 21.3 Searching Protocols Genotyping protocols can be search by going to Search->Genotyping Protocols. To download genotypes accessions must be selected, though any combination of search criteria can be used to filter and select those accessions. If a genotyping protocol is not selected, then the default genotyping protocol set in the configuration will be used. Genotyping protocols can also be selected in the wizard. The genotyping download menu on the Search Wizard presents options for filtering by chromosome, start position, and end position. Genotypes can be downloaded in VCF of Dosage Matrix formats. The genomic relationship matrix (GRM) can be downloaded for the selected accessions in a tab-delimited matrix format or in a three-column format that is useful in Asreml. Genotypes can be computed from the parents in the pedigree if those parents are genotyped by clicking on the compute from parents checkbox. Additionally, the GRM can be computed using genotypes of parents in the pedigree if the compute from parents checkbox is selected. As is described elsewhere, the Search Wizard presents a way to filter phenotypic values by minimum and maximum values, and allow for download in CSV and Excel formats. 21.4 Detail Pages and Deletion The genotyping protocol detail page will show all information about the protocol such as the reference genome used, the header information lines in the uploaded VCF file, the markers involved, and the samples genotyped. The markers section will show all markers used and their annotations, such as position, chromosome, alternate allele, reference allele, marker format, etc. The samples section will show all samples genotyped. Notice the Download links in the table which can be used to easily get the VCF file results for each genotyped samples with all markers in the genotyping protocol. For getting mulitple samples at once, use the Search Wizard as discussed above. The genotyping protocol and all associated genotyping data can be deleted from the genotyping protocol page. "],["managing-spectral-data.html", "Chapter 22 Managing Spectral Data 22.1 Upload Spectral Data 22.2 Evaluate and Remove Outliers 22.3 Plot Spectra 22.4 Aggregate Spectra 22.5 References", " Chapter 22 Managing Spectral Data Breedbase has implemented a flexible spectral data storage protocol that handles spectral data irrespective of the source spectrometer. Spectral data storage and analysis in Breedbase makes use of the R package waves for outlier identification, plotting, sample aggregation, and prediction model training. 22.1 Upload Spectral Data Spectral data can be added as a CSV file that includes metadata in the leftmost columns followed by one column per spectral measurement to the right. Rows represent a single scan or sample, each with a unique ID that must match to a Breedbase observationUnitName. Future data transfer using BrAPI will allow for interoperability with data collection software. To upload a spectral dataset, navigate to the Manage NIRS Data page by selecting NIRS in the Manage menu and click the blue Upload NIRS button. This will open an upload workflow. A link to the required file format and an example .csv file can be found by clicking in the light blue info box in this workflow. Another example of the file format is shown below. id: Optional identifier for each NIRS read. The id must be an integer. sampling_id: Optional identifier for each sample. Strings are allowed. sampling_date: Optional field. The format allowed is: YYYY-MM-DD. observationunit_name: Required field that matches existing data in the database. It can be the plot name, subplots, plant name, or tissue sample, depending how your trial is designed. device_id: Optional field to identify your device. Strings are allowed. device_type: Required field. It is possible upload data for a single device type. They can be: SCiO, QST, Foss6500, BunchiN500, or LinkSquare. comments: Optional field for general comments. All other columns are required wavelengths. You can add how many columns you want upload there is no limit. 22.2 Evaluate and Remove Outliers Spectral calibration models can be heavily affected by the presence of outliers, whether they come from spectrometer spectral artifacts or user errors. Mahalanobis distance (Mahalanobis, 1936) is a measure of the distance between a single observation and a larger distribution and is commonly used in the identification of outliers in a multivariate space (Des Maesschalck et al, 2000). The FilterSpectra() function in the R package waves calculates the Mahalanobis distance of each observation in a given spectral matrix using the stats::mahalanobis() function. Observations are identified as outliers if the squared distance is greater than the 95th percentile of a \\(\\chi\\)2-distribution with p degrees of freedom, where p is the number of columns (wavelengths) in the spectral matrix (Johnson and Wichern, 2007). In Breedbase, this procedure is applied on a per-dataset basis on upload and outliers are given binary tags Outlier. 22.3 Plot Spectra After outlier identification, a plot is generated using the PlotSpectra() function in waves. This function uses the filtered spectra and ggplot2::ggplot() to create a line plot with outliers highlighted by color. A list of rows identified as outliers are shown beneath the plot. Plots are saved as .png files and linked to the original input datasets. Plot image files can be downloaded via the Download Plot button in the upload workflow. 22.4 Aggregate Spectra To obtain a stable and reliable spectral profile, most spectrometer manufacturers recommend that multiple spectral scans are captured for each sample. While some spectrometers aggregate these scans internally, many do not, requiring the user to do so before analysis can take place. Breedbase handles these cases upon data upload following filtering steps by calling the AggregateSpectra() function from waves, saving the aggregated scans for future access through the search wizard feature. Scans are aggregated by sample mean (e.g.plot-level basis) according to the provided observationUnitName field. After aggregation, the user exits the upload workflow and the raw data file is saved in the upload archive. 22.5 References De Maesschalck, R., Jouan-Rimbaud, D., and Massart, D. L. (2000). The Mahalanobis distance. Chemom. Intell. Lab. Syst. 50(1): 1-18. Johnson, R. A. & Wichern, D. W. (2007). Applied Multivariate Statistical Analysis (6th Edition). p 773. Mahalanobis, P. C. (1936). On the generalized distance in statistics. National Institute of Science of India. Analysis tool documentation "],["managing-sequence-metadata.html", "Chapter 23 Managing Sequence Metadata 23.1 What is Sequence Metadata? 23.2 Loading Sequence Metadata 23.3 Searching Sequence Metadata 23.4 Marker Integration 23.5 Sequence Metadata API", " Chapter 23 Managing Sequence Metadata 23.1 What is Sequence Metadata? Sequence Metadata is a feature that allows for the efficient storage and retrieval of sequence annotations for a specific region along a reference genome. The annotation data can contain a primary score value and any number of secondary key/value attribute data. For example, Sequence Metatadata can store MNase open chromatin scores for every 10 basepairs along the reference genome as well as genome-wide association study (GWAS) statistics, including the trait information associated with the result. This data can then be filtered by position and/or scores/attribute values and even cross-referenced with markers stored in the database. 23.2 Loading Sequence Metadata Sequence Metadata can be loaded into the database using a gff3-formatted file. The following columns are used to load the data: #1 / seqid: The name of the database feature (ie chromosome) the metadata is associated with (The feature name must already exist as a feature in the database) #4 / start: The metadatas start position #5 / end: The metadatas end position #6 / score: (optional) The primary score attribute of the metadata #9 / attributes: (optional) Secondary key//value attributes to be saved with the score. These should be formatted using the gff3 standard (key1=value1;key2=value2). The attribute key cannot be either score, start, or end. To upload the gff3 file: Go to the Manage > Sequence Metadata page Click the Upload Sequence Metadata button On Step 2 of the Wizard, select the Type of data to be uploaded This groups similar datasets together in the same Data Type category On Step 3 of the Wizard, select an existing Protocol or create a new one The Protocol is used to describe how the data was generated and define the score value and any secondary attributes. Adding the attributes (and their descriptions) to the Protocol will allow the Sequence Metadata queries to filter the data based on the value of one or more of these attributes. Attributes not defined in the Protocol will still be stored and displayed on retrieval, but will not be able to be used in a search filter. Finally, select and upload your gff3 file to the database. The database will verify the format of the file before its contents are stored. 23.3 Searching Sequence Metadata To retrieve stored Sequence Metadata, go to the Search > Sequence Metadata page. 23.3.1 Basic Search The basic Sequence Metadata search options include selecting the reference genome and species, the chromosome, and (optionally) the start and/or end position(s) along the reference genome. In addition, one or more specific protocols can be selected to limit the results. The Sequence Metadata search results are returned as a table, including the chromosome and start/stop positions of the annotation, along with the primary score value and any additional key/value attributes. The markers column will include a list of marker names of any stored markers that are found within the start/stop positions of the Sequence Metadata. The data can be downloaded as a table in an Excel or CSV file or a machine-readable (code-friendly) JSON file. If the Sequence Metadata JBrowse configuration is set, the filtered results can be displayed as a dynamic JBrowse track. 23.3.2 Advanced Search Any number of advanced search filters can be applied to the query. The advanced filters can limit the search results by the value of the primary score and/or any of the secondary attribute values. 23.4 Marker Integration A table of Sequence Metadata annotations are embedded on the Marker/Variant detail page. The table will include any annotations that span the poisiton of the marker (for data of the same reference genome and species). 23.5 Sequence Metadata API A publicly accessible RESTful API (Application Programming Interface) is available to query the database for Sequence Metadata directly from your programming environment (R, python, etc) to be used in analysis. The data is returned in a JSON format. Documentation for the API can be found on the Manage > Sequence Metadata page "],["managing-outliers-in-dataset.html", "Chapter 24 Managing Outliers in Dataset 24.1 What is Outliers Functionality in Dataset ? 24.2 Accessing Trait Visualization 24.3 Interpreting Visual Elements 24.4 Choosing Cut-Off Values 24.5 Setting Deviation Multiplier 24.6 Utilizing Graph Controls", " Chapter 24 Managing Outliers in Dataset 24.1 What is Outliers Functionality in Dataset ? As in step The Search Wizard we can create a dataset. The dataset incorporates a feature to identify outlier points, which we may choose to exclude from a specific dataset. Its important to note that these exclusions only apply at the dataset level, and no data is permanently removed from the database. Additionally, outlier categorization can be modified at any time, and these changes are visible to all other functionalities within the system. Each dataset stores a wholly unique set of outlier points, completely independent of any other dataset in the database. Outliers are specifically designated for traits within datasets, exclusively encompassing phenotype data. If a particular dataset lacks traits as a part of wizard selection, this functionality is not available. Each trait has its own set of defined outliers. 24.2 Accessing Trait Visualization Once youve selected a specific trait, the web application provides access to a visualization of the data points associated with that trait. 24.3 Interpreting Visual Elements Once youve selected a specific trait, the web application provides access to a visualization of the data points associated with that trait. Green Points: As per the legend, represent values for the selected trait that fall below the cut-off point set by the slider. (non-outliers) Black Outlined Points: These data points are outlined with black borders, indicating that they are currently designated as outliers in the database. Red Points: The red data points denote the cut-off points established by the slider for the allowable deviation value. 24.4 Choosing Cut-Off Values You have two fundamental options for setting cut-off points: Median with MAD: This option involves using the median (middle value) along with the Mean Absolute Deviation (MAD) as a reference point for determining cut-off values. Mean with Standard Deviation: Alternatively, you can choose to use the mean (average) in conjunction with the Standard Deviation to set cut-off points. 24.5 Setting Deviation Multiplier The slider allows you to specify the deviation multiplier from a central point, which influences the cut-off values. 24.6 Utilizing Graph Controls Beneath the graph, youll find four buttons, each serving a distinct function: Add selection to outliers: This button enables you to save the current cut-off points to the database for future reference. Reset outliers for current trait: You can use this option to reset outliers for the selected trait. Reset all outliers: This button allows you to reset outliers for the entire dataset. Download Phenotype Table without outliers: You can download the phenotype data table in a comma-separated value format file, using this feature, with outliers excluded for selected dataset. These tools and functions are designed to provide you with control and insights when working with data visualization and outliers. "],["data-analysis-tools.html", "Chapter 25 Data Analysis Tools 25.1 Selection Index 25.2 Genomic Selection 25.3 Genome Browsing 25.4 Principal Component Analysis (PCA) 25.5 ANOVA 25.6 Clustering (K-Means, Hierarchical) 25.7 Genetic Gain 25.8 Kinship and Inbreeding Coefficients 25.9 Creating Crossing Groups 25.10 Search Wizard Genomic Relationship Matrix (GRM) Download 25.11 Search Wizard Genome Wide Association Study (GWAS) 25.12 Spectral Analysis 25.13 General Mixed Model Tool 25.14 Genomic Prediction of Cross Performance (GCPC)", " Chapter 25 Data Analysis Tools SGN databases provides several tools for phenotype data analysis, marker-assisted selection, sequence and expression analyses, as well as ontology browser. These tools can be found in the Analyze menu. 25.1 Selection Index To determine rankings of accessions based on more than one desirable trait, SGN databases provide a Selection Index tool that allows you to specify a weighting on each trait. To access the tool, clicking on Selection Index in the Analyze menu. On the Selection Index page, selecting a trial that you want to analyze. After you selected a trial, you can find traits that were assayed in that trial in the Trait box. Selecting a trait that you want to include in the analysis will open a new dialogue showing the selected trait and a box that you can assign a Weight of that trait. After you are done, you can continue by selecting another trait by clicking on Add another trait link. After you selected another trait, this page will automatically update information for you by showing all of the traits that you selected for the analysis. You also have options to choose a reference accession, choose to include accessions with missing phenotypes, scaling values to a reference accession. After you complete your setting, clicking on Calculate Rankings The Selection Index tool will generate rankings of accessions based on the information that you specified. You can copy the results to your system clipboard, convert the table data to CSV format, or print the data. Clicking on Raw Average will display average values of the phenotypes of those ranked accessions. Selection Index tool also allows you to save top ranked accessions directly to Lists. You can retrieve top ranked accessions by selecting a number or a percent. 25.2 Genomic Selection The prediction of breeding values for a trait is a one step or two steps process, depending on what stage in your breeding cycle you are. The first step is to build a prediction model for a trait using a training population of clones with phenotype and genotype data. If you have yet to select parents for crossing for your first cycle of selection you can use the breeding values of the training population. If you are at later stages of your selection program, you need to do the second step which is applying the prediction model on your selection population. All clones in your training and selection populations must exist in the database. To use the genomic selection tool, on cassavabase.org, select Genomic Selection from the analyze pull-down menu. There are three ways to build a model for a trait. 25.2.1 Building a Model - Method 1: One way to build a model is, using a trait name, to search for trials in which the trait was phenotyped and use a trial or a combination of trials to build a model for the trait. For example, if you search for mosaic disease severity, you will get a list of trials you can use as training populations. You will get a list of trials (as shown below) in which the trait of your interested was phenotyped. From the list, you can use a single trial as a training population or combine several trails to form a training population for the prediction model of the trait. Lets say, you want to create a training population using individuals from trials cassava ibadan 2001/02 and cassava ibadan 02/03 and build a model for cassava mosaic disease severity using all clones from the training population. Select the trials to combine (the same coloured), click done selecting, click the combine trials and build model button, and you will get a model and its output for the trait. On the model detail page, you can view the description of input data used in the model, output from the model and search interface for selection populations the model you can apply to predict their breeding values. The description of the input data for the model includes the number of phenotyped clones, and the number of markers, scatter and frequency distribution plots for the phenotype data, relationship between the phenotype data and GEBVs, population structure. The model output includes model parameters, heritability of the trait , prediction accuracy, GEBVs of the individuals from the training population and marker effects. Expand each section to see detailed information. If you expand the Trait phenotype data section, you will find plots to explore the phenotype data used in the model. You can assess the phenotype data using a scatter and histogram plots and the descriptive statistics. A regression line between observed phenotypes and GEBVs shows the relationship between the two. You can also explore if there is any sub-clustering in the training population using PCA. To check the model accuracy, a 10-fold cross-validation test, expand the model accuracy section. Marker effects are also available for download. To do so, expanad the Marker Effects section and click the Download all marker effects link and you will get a tab delimited output to save on your computer. The breeding values of the individuals used in the training population are displayed graphically. Mousing over each data point displays the clone and its breeding value. To examine better, you can zoom in into the plot by selecting an area on the plot. You can download them also by following the Download all GEBVs link. Estimating breeding values in a selection population If you already have a selection population (in the database), from the same model page, you can apply the model to the selection population and estimate breeding values for all the clones in the population. You can search for a selection population of clones in the database using the search interface or you can make a custom list of clones using the list interface. If you click the search for all relevant selection populations, you will see all relevant selection populations for that model. However, this option takes long time decause of the large set of populations in the database and the filtering. Therefore, the fastest way is to search for each of your selection populations by name. If you are logged in to the website you will also see a list of your custom set of genotyped clones. To apply the model to a selection population, simply click your population name or Predict Now and you will get the predicted breeding values. When you see a name of (or acronym]) of the trait, follow the link and you will see an interactive plot of the breeding values and a link to download the breeding values of your selection population. 25.2.2 Building a Model - Method 2 Another way to build a model is by selecting a trial, instead of selecting and searching for a specific trait. This approach is useful when you know a particular trial that is relevant to the environment you are targeting to breed material for. This method allows you to build models and predict genomic estimated breeding values (GEBVs) for several traits within a single trial at once. You can also calculate selection index for your clones when GEBVs are estimated for multiple traits. To do this select the Genomic Selection link found under the analyze menu. This will take you to the same home page as used with Method 1. However, instead of entering information to search for in Search for a trait, click on Use a trait as a trial population. This will expand a new menu that will show all available trials. To begin creating the model, select the existing trial that you would like to use. In this example I will be using the trial and trait data from Cassava Ibadan 2002/03 trial. Clicking on a trial will take you to a page where you can find information such as number of markers and number of phenotypes clones. In addition to the number of phenotype clones and number of markers, the main page for the trial selected also has information and graphs on phenotypic correlation for all of the traits. By moving your cursor over the graph you can read the different values for correlation between two traits. A key with all of the trait names of the acronyms used can be found in the tab below the graph. Below the Training population summary there is a tab for Traits. Clicking on this tab will show all available traits for the specific trial. You can create a model by choosing one or multiple traits in the trial and clicking Build Model. In this example, the traits for cassava bacterial blight severity and cassava mosaic disease severity have been selected. Clicking on Build Model will take you to a new page with the models outputs for the traits. Under the Genomic Selection Model Output tab you can view the model output and the model accuracy. Clicking on any of the traits will take you to a page with information about the model output on that individual trait within the trial. There you can view all of the trait information that was seen in more detail in Method 1. You can apply the models to simultaneously predict GEBVs for respective traits in a selection population by clicking on Predict Now or the name of the selection population. You can also apply the models to any set of genotyped clones that you can create using the lists feature. For more information on lists, click here. Follow the link to the trait name to view and download the predicted GEBVs for the trait in a selection population. To compare clones based on their performance on multiple traits, you can calculate selection indices using the form below. Choose from the pulldown menu the population with predicted GEBVs for the traits and assign relative weights for each trait. The relative weight of each trait must be between 0 - 1. 0 being of least weight and importance, not wanting to consider that particular trait in selecting a genotype and 1 being a trait that you give highest importance. In this example we will be using the Cassava Ibadan 2002/03 population and assigning values to each of the traits. Remember that there is a list of acronyms and trait names at the bottom of the page for reference. After entering whatever values you would like for each trait click on the Calculate button to generate results. This will create a list of the top 10 genotypes that most closely match the criteria that you entered. The list will be displayed right below the selection index tab. This information can also be downloaded onto your computer by clicking on the Download selection indices link underneath the listed genotypes and selection indices. 25.2.3 Building a Model - Method 3 In addition to creating a model by searching for pre-existing traits or by preexisting trial name, models can also be created by using your own list of clones. This creates a model by using or creating a training population. The page to use the third Method for creating a population model is the same as for the other two models. Select Genomic Selection from under the analyze menu of the main toolbar. This will take you to the Genomic Selection homepage and show you all three available methods to create a model. To see and use Method 3 scroll down and click on the tab labeled Create a Training Population. This will open a set of tools that will allow you to use pre-existing lists or to create a new list. Once the Create a Training Population tab is opened you have the option to use a pre-existing list or create new one. To learn how to create a list, click here. The Make a new list of plots link will take you directly to the Search Wizard that is usually used to create lists. Please note: the only lists that can be used in Method 3 to create a model are lists of plots and trials. If the pre-existing list is not of plots or trials (for example, traits, or locations) it will not show up and cannot be used as a training population. When you create you use a list of trials, the trials data will be combined to create a training data set. To use your custom list of plots or trials as a training population, select the list and click Go. This will take you to a detail page for the training population. From here on you can build models and predict breeding values as described in Method 2. 25.3 Genome Browsing There are two ways to evaluate genotype information within the browser, from an accession detail page or a trial detail page. 25.3.1 Browsing Genotype data by Accession If you are interested in browsing genotype information for a single accession, for example BAHKYEHEMAA, navigate to the accession detail page. Near the bottom of the detail page is a collapsible section called Accession Jbrowse. This section will contain a link to the accession jbrowse page if the necessary genotype data is available. Clicking the link should take you to a page that looks like this, a which point you can browsre the genotype data in the form of a vcf track aligned to the latest build of the genome. 25.3.2 Browsing Genotype data by Trial If you are interested in browsing genotype information for the accessions within a given trial, navigate to the trial detail page. Halfway down the page is a collapsible section called Trial Jbrowse. This section will contain a link to the trial jbrowse page if the necessary genotype data for at least two accessions planted in the trial is available. Clicking the link should take you to a page that looks like this, a which point you can browse the genotype data in the form of vcf tracks aligned to the latest build of the genome. 25.4 Principal Component Analysis (PCA) Principal component analysis helps estimate and visualize if there is sub-grouping of individuals within a dataset based on a number of variables. Currently, you can use marker data to run PCA on datasets. You can run PCA from multiple places on the website. To do PCA on individuals from a trial, go to the trial detail page and find the PCA tool under the Analysis tools section. individuals from a training population you used in a GS modeling, do your modeling and find the PCA tool in the model output page. individuals in a training population and selection population you applied the training model, do your modeling, apply the model on the selection population and find the PCA tool on the selection population prediction output page. individuals in a list of accessions you created, for example using the search wizard, go to the Analyze menu and select the Population Structure, select your list of individuals and run PCA. individuals from multiple trials, create a list of the trials using the search wizard, go to the Analyze menu and select the Population Structure, select your list of trials and run PCA. With all the options, you will get a interactive plot of the two PCs (shown below) that explain the largest variance. Point the cursor at any data point and you will see the individual name with its corresponding PCs scores. By clicking the Download all PCs, you can also download the 10 PCs scores in the text format. 25.5 ANOVA Currently, ANOVA is implemented for a single trial (single year and single location). You can do ANOVA for RCBD, CRD, Alpha and Augmented trial designs. ANOVA is done using linear mixed effects model, where the genotypes is fixed effect and the replications and blocks are random effects. Fixed effect significance level is computed using lmer from lmeTest R package. You can do ANOVA from two places: trial detail and training population detail. In both cases, if the phenotype data was from the supported trial designs, Go to the ANOVA section down in the trial or training population page Select the trait of you want to perform ANOVA Click the Run ANOVA and wait for the result 25.6 Clustering (K-Means, Hierarchical) The K-Means method allows you to partition a dataset into groups (K number). The hierarchical clustering, agglomerative, allows you to explore underlying similarity and visualize in a tree structure (dendrogram) the different levels of similarities (clusters) among samples. You can do clustering based on marker data, phenotype data and GEBVs. When you use phenotype data, first clone averages for each trait are calculated. Both methods use Euclidean distance as a measure of similarity. For the hierachical clustering, the complete-linkage (farthest neighbour) method is used to link up clusters. There are three pathways to using this tool. When you have data in the form of a list or dataset from the search wizard: go to the Analyze menu and select the clustering option make sure you are logged in Select the relevant genotyping protocol, if you are clustering using genotype data select your list or dataset, click Go select clustering type select the data type to use If you are running K-Means clustering, provide the number of partitions (K). If left blank it will partition the data set into optimal numbers for the dataset. click the Run Cluster and wait for the analysis to finish or queue the request and wait for an email with the analysis result. You can download the outputs following the download links. From the trial detail page: Go to the Analysis Tools section Follow steps D to G in (1) In the solGS pipeline: Once you you are in a model output put page, you will see a section where you can do clustering in the same way as above (option 2). K-Means clustering: Hierarchical clustering: 25.7 Genetic Gain You can check for genetic gain by comparing the the GEBVs of a training and a selection population. You can do this in the solGS pipepline once you build a model and apply the model to predict the GEBVs of a selection population. Once at that stage, you will see a section Check Genetic Gain. Select a selection population to compare with the training population and click the Check Genetic Gain button. The genetic gain will be visualized in boxplots. You can download the boxplot(s) as well as the GEBVs data used for the plot(s). 25.8 Kinship and Inbreeding Coefficients This tool allows you to estimate genetic relatedness between a pair of individuals (kinship), homozygousity across loci in an individual (inbreeding coefficient), and genetic similarity of an individual relative to the rest of the population (averge kinship). There are three pathways to using this tool. (1) When you have a list or dataset clones, created from the search wizard: go to the Analyze menu and select the kinship and inbreeding make sure you are logged in Select the genotypic protocol for the marker data select your list or dataset of clones, click Go click the Run Kinship and wait for the analysis to finish, depending on the data size this may take minutes. You can choose to submit the analysis and wait for an email notice to view the results or wait for it to complete. You can download the output following the download links. (2) From the trial detail page: Go to the Analysis Tools section Follow steps C to G in (1) (3) In the solGS pipeline: Once you you are in a model output put page, scroll down to the Kinship and Inbreeding section and run kinship. 25.9 Creating Crossing Groups If you calculate selection index based on GEBVs of multiple traits, and you want to select a certain proportion of the indexed individuals (e.g.top 10%, or bottom 10%) and then you want to partition the selected individuals into a number of groups based on their genotypes, you can use the k-means clustering method. The procedure is: predict GEBVs for 2 or more traits In the models output page, calculate selection indices. Note the name of the selection index data. Go to the clustering section, select the selection index data, select K-means, select Genotype, in the K-numbers textbox, fill in the number of groups you want to create, in the selection proportion textbox, fill in the proportion of the indexed individuals you want to select, e.g.for the top 15 percent, 15. if you wish to select bottom performing, prefix the number with minus sign (e.g.-15) then run cluster and wait for the result. 25.10 Search Wizard Genomic Relationship Matrix (GRM) Download The genomic relationship matrix (GRM) is useful for understanding underlying structure in your population. Breedbase can compute the GRM using rrBLUP. First, select accessions in the search wizard and optionally select a genotyping protocol. If no genotyping protocol is selected, the default genotyping protocol in your system is used (as defined in sgn_local.conf). Specify the minor allele frequency, missing marker data, and missing individuals data filters to apply. The GRM can be returned in a matrix format (.tsv) which shows all pairwise relationships between the selected accessions and is useful for visualization; alternatively, the GRM can be returned in a three-column format (.tsv) which is useful for programs like ASReml outside of Breedbase. The GRM can also be returned as a simple correlation heatmap image (.pdf). The GRM can be computed from parents of the selected accessions granted the parents were genotyped, by clicking the checkbox compute from parents; this is useful for programs where parental lines are genotyped and then hybrids are created and evaluated in the field. 25.11 Search Wizard Genome Wide Association Study (GWAS) Performing a genome wide association study (GWAS) can determine genotypic markers which are significantly correlated to phenotypic traits. Breedbase can compute GWAS using rrBLUP. First, select accessions and trait(s) in the search wizard, and optionally select a genotyping protocol. If no genotyping protocol is selected, the default genotyping protocol in your system is used (as defined in sgn_local.conf). Several traits can be selected in the search wizard; if the traits are not to be treated as repeated measurements then select no in the select box and this will tell Breedbase to return GWAS results independently for the selected traits. If the selected traits are indeed all repeated measurements then select yes in the select box and Breedbase will return as single GWAS analysis across all the phenotypic records. Specify the minor allele frequency, missing marker data, and missing individuals data filters to apply. GWAS results can be returned in a tabular format (.tsv) where the -log10(p-values) for the selected traits are returned; alternatively, the GWAS results can be returned as Manhattan and QQ plots for the selected traits. The GWAS can be computed from parents of the selected accessions granted the parents were genotyped, by clicking the checkbox compute from parents; this is useful for programs where parental lines are genotyped and then hybrids are created and evaluated in the field. The GWAS will filter the data by the input MAF and missing data filters provided. After filtering the data is imputed using an EM method in rrBLUP. The Kinship matrix (GRM) is computed from the imputed genotypic data and used in the GWAS model. The GWAS uses fixed effects for different field trials and replicates in the phenotypic data. 25.12 Spectral Analysis Visible and near-infrared spectroscopy (vis-NIRS) can be related to reference phenotypes through statistical models to produce accurate phenotypic predictions for unobserved samples, increasing phenotyping throughput. This technique is commonly used for predicting traits such as total starch, protein, carotenoid, and water content in many plant breeding programs. Breedbase implements the R package waves to offer training, evaluation, storage, and use of vis-NIRS prediction models for a wide range of spectrometers and phenotypes. 25.12.1 Dataset selection In order to initiate an analysis, the user must select one or more datasets using 2.1. A dataset in Breedbase can contain observationUnit-level (plot-, plant-, or sample-level) trial metadata and phenotypic data from one or more trials. After navigating to the NIRS webpage under the Manage tab in Breedbase, the user can initiate an analysis and select one of these datasets as input for model training. An optional test dataset can be selected in the second step of the workflow. 25.12.2 Cross-validation Five cross-validation schemes that represent scenarios common in plant breeding are available for this analysis. These include CV1, CV2, CV0, and CV00 as outlined below and described in depth by Jarqun et al.(2017) as well as random and stratified random sampling with a 70% training and 30% validation split. For those schemes from Jarqun et al.(2017), specific input datasets must be chosen based on genotype and environment relatedness. Cross-validation choices: * Random sampling (70% training / 30% validation) * Stratified random sampling, stratified based on phenotype (70% training / 30% validation) * CV1, untested lines in tested environments * CV2, tested lines in tested environments * CV0, tested lines in untested environments * CV00, untested lines in untested environments 25.12.3 Preprocessing Preprocessing, also known as pretreatment, is often used to increase the signal to noise ratio in vis-NIR datasets. The waves function DoPreprocessing() applies functions from the stats and prospectr packages for common spectral preprocessing methods with the following options: * Raw data (default) * First derivative * Second derivative * Gap segment derivative * Standard normal variate (SNV; Barnes et al., 1989) * Savitzky-Golay polynomial smoothing (Savitzky and Golay, 1964) For more information on preprocessing methods and implementation, see the waves manual, available through CRAN: waves.pdf 25.12.4 Algorithms Several algorithms are available for calibration model development in Breedbase via the waves package. The TrainSpectralModel() function in waves performs hyperparameter tuning as applicable using these algorithms in combination with cross validation and train functions from the package caret. Currently, only regression algorithms are available, but classification algorithms such as PLS-DA and SVM clasification are under development. * Partial least squares regression (PLSR; Wold et al., 1982; Wold et al., 1984) is a popular method for spectral calibrations, as it can handle datasets with high levels of collinearity, reducing the dimensionality of these data into orthogonal latent variables (components) that are then related to the response variable through a linear model (reviewed in Wold et al., 2001). To avoid overfitting, the number of these components included in the final model must be tuned for each use case. The PLSR algorithm from the pls package is implemented by waves. * Random Forest regression (RF; Ho, 1995) is a machine learning algorithm based on a series of decision trees. The number of trees and decisions at each junction are hyperparameters that must be tuned for each model. Another feature of this algorithm is the ability to extract variable importance measures from a fitted model (Breiman, 2001). In Breedbase, this option is made available through implementation of the RF algorithm from the package randomForest in the waves function TrainSpectralModel(). This function outputs both model performance statistics and a downloadable table of importance values for each wavelength. It is worth noting that this algorithm is computationally intensive, so the user should not be alarmed if results do not come right away. Breedbase will continue to work in the background and will display results when the analysis is finished. * Support vector machine regression (SVM; Vapnik, 2000) is another useful algorithm for working with high-dimension datasets consisting of non-linear data, with applications in both classification and regression. The package waves implements SVM with both linear and radial basis function kernels using the kernlab package. 25.12.5 Output: common model summary statistics After training, model performance statistics are both displayed on a results webpage and made available for download in .csv format. These statistics are calculated by the TrainSpectralModel() function in waves using the caret and spectacles packages. Reported statistics include: * Tuned parameters depending on the model algoritm * Best.n.comp, the best number of components to be included in a PLSR model * Best.ntree, the best number of trees in an RF model * Best.mtry, the best number of variables to include at every decision point in an RF model * RMSECV, the root mean squared error of cross-validation * R2cv, the coefficient of multiple determination of cross-validation for PLSR models * RMSEP, the root mean squared error of prediction * R2p, the squared Pearsons correlation between predicted and observed test set values * RPD, the ratio of standard deviation of observed test set values to RMSEP * RPIQ, the ratio of performance to interquartile distance * CCC, the concordance correlation coefficient * Bias, the average difference between the predicted and observed values * SEP, the standard error of prediction * R2sp, the squared Spearmans rank correlation between predicted and observed test set values 25.12.6 Export model for later use Once a model has been trained, it can be stored for later use. This action calls the SaveModel() function from waves. Metadata regarding the training dataset and other parameters specified by the user upon training initialization are stored alongside the model object itself in the database. 25.12.7 Predict phenotypes from an exported model (routine use) For phenotype predictions, users select a dataset and can then choose from models in the database that were trained using the same spectrometer type as the spectral data in the chosen dataset. Predicted phenotypes are stored as such in the database and are tagged with an ontology term specifying that they are predicted and not directly measured. Metadata regarding the model used for prediction is stored alongside the predicted value in the database. Predicted phenotypes can then be used as normal in other Breedbase analysis tools such as the Selection Index and GWAS. 25.12.8 FAQ The Breedbase Spectral Analysis Tool does not allow for prediction models involving data from multiple spectrometer types at once. References * Barnes, R.J., M.S. Dhanoa, and S.J. Lister. 1989. Standard normal variate transformation and de-trending of near-infrared diffuse reflectance spectra. Appl. Spectrosc. 43(5): 772-777. doi: 10.1366/0003702894202201. * Breiman, L. 2001. Random forests. Mach. Learn. 45: 5-32. doi: 10.1201/9780429469275-8. * Ho, T.K. 1995. Random decision forests. Proc. Int. Conf. Doc. Anal. Recognition, ICDAR 1: 278-282. doi: 10.1109/ICDAR.1995.598994. * Jarqun, D., C. Lemes da Silva, R.C. Gaynor, J. Poland, A. Fritz, et al.2017. Increasing Genomic-Enabled Prediction Accuracy by Modeling Genotype x Environment Interactions in Kansas Wheat. Plant Genome 10(2): plantgenome2016.12.0130. doi: 10.3835/plantgenome2016.12.0130. * Johnson, R.A., and D.W. Wichern. 2007. Applied Multivariate Statistical Analysis (6th Edition). De Maesschalck, R., D. Jouan-Rimbaud, and D.L. Massart. 2000. The Mahalanobis distance. Chemom. Intell. Lab. Syst. 50(1): 1-18. doi: 10.1016/S0169-7439(99)00047-7. * Mahalanobis, P.C. 1936. On the generalized distance in statistics. Natl. Inst. Sci. India. * Savitzky, A., and M.J.E. Golay. 1964. Smoothing and Differentiation of Data by Simplified Least Squares Procedures. Anal. Chem. 36(8): 1627-1639. doi: 10.1021/ac60214a047. * Shrestha, R., L. Matteis, M. Skofic, A. Portugal, G. McLaren, et al.2012. Bridging the phenotypic and genetic data useful for integrated breeding through a data annotation using the Crop Ontology developed by the crop communities of practice. Front. Physiol. 3 AUG(August): 1-10. doi: 10.3389/fphys.2012.00326. * Vapnik, V.N. 2000. The Nature of Statistical Learning Theory. Springer New York, New York, NY. * Wold, S., A. Ruhe, H. Wold, and W.J. Dunn, III. 1984. The Collinearity Problem in Linear Regression. The Partial Least Squares (PLS) Approach to Generalized Inverses. SIAM J. Sci. Stat. Comput. 5(3): 735-743. doi: 10.1137/0905052. * Wold, S., M. Sjstrm, and L. Eriksson. 2001. PLS-regression: a basic tool of chemometrics. Chemom. Intell. Lab. Syst. 58(2): 109-130. doi: 10.1016/S0169-7439(01)00155-1. 25.13 General Mixed Model Tool The general mixed model tool is available at /tools/mixedmodels and a link is provided from the Analyze menu. To use the mixed model tool, first create dataset using the Wizard containing the data that you would like to analyze. Select the Mixed Model tool from the Analyze menu. You are presented with a workflow. On the first step of the workflow, select the dataset that you wish to analyze, click on Choose dataset to continue. The second part of the workflow presents you with the traits in the dataset; you can select one or more traits from the lists using the select buttons. If you selected one trait, a bargraph of the trait distribution will be shown. Click the Next step button to move to the next screen. On the model build screen, all the factors are displayed that are contained within the dataset. The factors are presented as a list of blue buttons that can be dragged using the mouse to areas on the screen which build a mixed model equation. The areas correspond to fixed factors, random factors, and optionally to more complex factors, such as fixed factors with interaction and fixe factors with vriable slope/intersects. Drag the available factors to the corresponding area. To calculate BLUPs for germplasm, drag the germplasmName button to the Random factors area. To calculate BLUEs, drag it to the Fixed factors area. The factors need to have different levels contained within them, for example, if there is only one trial in the dataset, it cannot be used as one of the factors. Click on Run analysis and got to next step to run the mixed model and display the results. The result view contains two tabs, one with the raw data, either BLUPS or BLUEs, and the other the adjusted means from the raw data. The results can be stored in the database as an analysis, by clicking the button provided on the top of the data. 25.14 Genomic Prediction of Cross Performance (GCPC) The GCPC tool is available at /tools/gcpc and a link is provided from the Analyze menu. The GCPC tool implements genomic prediction with additive and directional dominance in the linear mixed model to predict for cross performance. Before using the tool, first create a dataset using the Wizard containing the data that you would like to analyze. (The dataset should have genotyping_protocols). Second, create Selection Indices for your traits using Selection Index in Analyze Menu. To use the tool, Select the GCPC tool from the Analyze menu. Then, select the dataset with genotyping_protocols that you wish to analyze, click on Proceed to Factor Selection to load available factors that can be included in the model. Select the factors you wish to include in the model either as Fixed or Random. Click None for factors that you dont want to include in the model. Note that the germplasmName is factored as Random by default. The next step is to select the selection index for your traits on the dropdown menu. Once you are through, click Run GCPC to run the model. The output will be presented in form of a table with ID, Parent1, Parent2 and their cross prediction merit organized in descending order. The results will also have sex information based on whether the dataset has plant sexes available in the database. "],["404.html", "Page not found", " Page not found The page you requested cannot be found (perhaps it was moved or renamed). You may want to try searching to find the page's new location, or use the table of contents to find the page you are looking for. "]] diff --git a/docs/searching-the-database.html b/docs/searching-the-database.html index d2e8cbed3c..14f060c3b8 100644 --- a/docs/searching-the-database.html +++ b/docs/searching-the-database.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/using-fieldbook-app.html b/docs/using-fieldbook-app.html index 31ce211664..42bcc229af 100644 --- a/docs/using-fieldbook-app.html +++ b/docs/using-fieldbook-app.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/docs/using-the-label-designer.html b/docs/using-the-label-designer.html index 5333bf135f..d47392c05e 100644 --- a/docs/using-the-label-designer.html +++ b/docs/using-the-label-designer.html @@ -23,7 +23,7 @@ - + @@ -184,7 +184,8 @@
  • 10.2.9 Adding Plant Entries To Your Trial
  • 10.2.10 Adding Tissue Sample Entries To Your Trial
  • 10.2.11 Uploading GPS Coordinates For Plots
  • -
  • 10.2.12 Uploading Additional Files To Trial
  • +
  • 10.2.12 Repetitive Measurements Section
  • +
  • 10.2.13 Uploading Additional Files To Trial
  • 10.3 Updating Trial Data
  • 10.4 Deleting Trial Data
  • diff --git a/js/source/legacy/d3/graphers/lineGraphRepetitiveValues.js b/js/source/legacy/d3/graphers/lineGraphRepetitiveValues.js new file mode 100644 index 0000000000..915aa133ac --- /dev/null +++ b/js/source/legacy/d3/graphers/lineGraphRepetitiveValues.js @@ -0,0 +1,187 @@ +(function(exports){ + //check, whether d3v4 is loaded !! + var d3 = typeof d3v4 !== 'undefined' ? d3v4 : d3; + if (typeof d3 === 'undefined') { + throw new Error ("D3 is not loaded"); + } + + /* + + * Draw a line graph for repetitive trait values - for both the small and large graph !!. + @param {Array} data - an array of object, which contains both the 'value' and 'date' properties !! + @param {HTMLElement|String} container - the target where the graph should be drawn !! + @param {Object} [layout] - (OPTIONAL) ab object that that holds all the dimensions properties including length, width, margin etc ... of the line graph !! + @param {String} trait_name - the trait_name is used for the y-axis label !! + @param {string} label_observation_unit_name - since, we have the repetitive values for unique obs_unit_name, therefore, using as the title of the graph !! + @param {Object} [options] - (OPTIONAL) this is an object holds the properites related to graphs details - x- and y-axis labels, title, and data-points !! + + */ + + exports.drawLineGraph = function(data, container, layout, trait_name, label_observation_unit_name, options) { + // set the default layout for the large graph + layout = layout || { + "width": 800, + "height": 400, + "margin": { "top": 20, "right": 30, "bottom": 100, "left": 80 } + }; + + options = options || { + showXAxis: true, + showYAxis: true, + showDots: true, + showTitle: true + }; + + var margin = layout.margin; + var width = layout.width - margin.left - margin.right; + var height = layout.height - margin.top - margin.bottom; + + // Clear any existing content + d3.select(container).html(''); + + // Create SVG container + var svg = d3.select(container) + .append("svg") + .attr("width", width + margin.left + margin.right) + .attr("height", height + margin.top + margin.bottom) + .append("g") + .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); + + // Parse the date strings into Date objects + data.forEach(d => { + d.date = new Date(d.date); + d.value = +d.value; + }); + + // Set the x-axis scale + var xExtent = d3.extent(data, function(d) { return d.date; }); + var xPadding = 0.05 * (xExtent[1] - xExtent[0]); + var x = d3.scaleTime() + .domain([ + d3.timeMillisecond.offset(xExtent[0], -xPadding), + d3.timeMillisecond.offset(xExtent[1], xPadding) + ]) + .range([0, width]); + + // Set the y-axis scale + var y = d3.scaleLinear() + .domain([d3.min(data, function(d) { return d.value; }), d3.max(data, function(d) {return d.value; })]) + .nice() + .range([height, 0]); + + // add the x-axis + if (options.showXAxis) { + svg.append("g") + .attr("transform", "translate(0," + height + ")") + .call(d3.axisBottom(x) + .tickFormat(d3.timeFormat("%Y-%m-%d"))) + .selectAll("text") + .attr("transform", "rotate(-45)") //the labels will be at 45 degree angle because of the space !! + .style("text-anchor", "end"); + } + + // Add y-axis + if (options.showYAxis) { + svg.append("g") + .call(d3.axisLeft(y)); + } + + var line = d3.line() + .x(function(d) {return x(d.date); }) + .y(function(d) {return y(d.value); }) + .curve(d3.curveMonotoneX); //this will make the line + + // Draw line path + svg.append("path") + .datum(data) + .attr("fill", "none") + .attr("stroke", "steelblue") + .attr("stroke-width", 2) + .attr("d", line); + + var tooltip = d3.select("body").append("div") + .attr("class", "tooltip") + .style("position", "absolute") + .style("background-color", "#fff") + .style("border", "1px solid #ccc") + .style("padding", "5px") + .style("z-index", 9999) + .style("opacity", 0); + if (options.showDots) { + var dotsGroup = svg.selectAll("g.dot-group") + .data(data) + .enter() + .append("g") + .attr("class", "dot-group"); + + // The visible circle (red, radius=4) + dotsGroup + .append("circle") + .attr("cx", function(d) { return x(d.date); }) + .attr("cy", function(d) { return y(d.value); }) + .attr("r", 4) + .attr("fill", "red"); + + // The invisible circle (bigger radius) to capture hover events + dotsGroup + .append("circle") + .attr("cx", function(d) { return x(d.date); }) + .attr("cy", function(d) { return y(d.value); }) + .attr("r", 10) // bigger radius + .style("fill", "none") + .style("pointer-events", "all") // ensure it can receive events + .on("mouseover", function(d) { + var e = d3.event; + tooltip + .html( + "Value: " + d.value + "
    " + + "Date: " + d3.timeFormat("%Y-%m-%d")(d.date) + ) + .style("left", (e.pageX + 10) + "px") + .style("top", (e.pageY - 25) + "px") + .style("opacity", 1); + }) + .on("mousemove", function() { + var e = d3.event; + tooltip + .style("left", (e.pageX + 10) + "px") + .style("top", (e.pageY - 25) + "px"); + }) + .on("mouseout", function() { + tooltip.style("opacity", 0); + }); + } + + // add the attributes to the x- and y-axis + if (options.showXAxis) { + svg.append("text") + .attr("x", width / 2) + .attr("y", height + margin.bottom - 10) + .attr("text-anchor", "middle") + .style("font-size", "12px") + .text("Collect Date"); + } + + if (options.showYAxis) { + svg.append("text") + .attr("transform", "rotate(-90)") + .attr("x", -height / 2) + .attr("y", -margin.left + 20) + .attr("text-anchor", "middle") + .style("font-size", "12px") + .text(trait_name); + } + + if(options.showTitle) { + svg.append("text") + .attr("x", width / 2) + .attr("y", 0 - (margin.top / 4)) + .attr("text-anchor", "middle") + .style("font-size", "16px") + //.style("text-decoration", "underline") + .text(label_observation_unit_name); + } + + }; + +}(typeof exports === 'undefined' ? this.lineGraphRepetitiveValues = {} : exports)); \ No newline at end of file diff --git a/js/source/modules/wizard-downloads.js b/js/source/modules/wizard-downloads.js index 388029657d..ef431b7b79 100644 --- a/js/source/modules/wizard-downloads.js +++ b/js/source/modules/wizard-downloads.js @@ -197,8 +197,14 @@ export function WizardDownloads(main_id,wizard){ var outliers = d3.selectAll('.wizard-download-phenotypes-outliers').property('checked')?1:0; var names = JSON.stringify(d3.select(".wizard-download-phenotypes-name").node().value.split(",")); var min = d3.select(".wizard-download-phenotypes-min").node().value; - var max = d3.select(".wizard-download-phenotypes-max").node().value; + var max = d3.select(".wizard-download-phenotypes-max").node().value; + var repetitive_measurements = d3.select(".wizard-download-repetitive-measurements-type").node().value; + var phenotype_start_date = d3.select(".wizard-download-start-date").node().value; + var phenotype_end_date = d3.select(".wizard-download-end-date").node().value; + //alert('start date = '+phenotype_start_date); + //alert('repetitive type = '+repetitive_measurements); + var url = document.location.origin+'/breeders/trials/phenotype/download'; openWindowWithPost(url, { trial_list: trial_ids, @@ -220,6 +226,9 @@ export function WizardDownloads(main_id,wizard){ include_row_and_column_numbers: 1, exclude_phenotype_outlier: outliers, include_pedigree_parents: 0, + repetitive_measurements: repetitive_measurements, + phenotype_start_date: phenotype_start_date, + phenotype_end_date: phenotype_end_date }); }); }); diff --git a/lib/CXGN/BrAPI/Pagination.pm b/lib/CXGN/BrAPI/Pagination.pm index 31082342a0..5bdaf28986 100644 --- a/lib/CXGN/BrAPI/Pagination.pm +++ b/lib/CXGN/BrAPI/Pagination.pm @@ -6,7 +6,7 @@ use Data::Dumper; sub pagination_response { my $self = shift; my $data_count = shift; - my $page_size = shift; + my $page_size = shift || 10; my $page = shift; $page_size += 0; # convert from string to int diff --git a/lib/CXGN/BrAPI/v2/Observations.pm b/lib/CXGN/BrAPI/v2/Observations.pm index cca5aebd5a..28862b7e06 100644 --- a/lib/CXGN/BrAPI/v2/Observations.pm +++ b/lib/CXGN/BrAPI/v2/Observations.pm @@ -10,6 +10,7 @@ use CXGN::BrAPI::Pagination; use CXGN::BrAPI::FileRequest; use CXGN::Phenotypes::StorePhenotypes; use CXGN::TimeUtils; +use DateTime; use utf8; use JSON; @@ -248,8 +249,9 @@ sub _search { my $brapi_trial_ids_arrayref = $params->{trialDbId} || ($params->{trialDbIds} || ()); my $accession_ids_arrayref = $params->{germplasmDbId} || ($params->{germplasmDbIds} || ()); my $program_ids_arrayref = $params->{programDbId} || ($params->{programDbIds} || ()); - my $start_time = $params->{observationTimeStampRangeStart}->[0] || undef; - my $end_time = $params->{observationTimeStampRangeEnd}->[0] || undef; + my $start_date = $params->{observationTimeStampRangeStart}->[0] || undef; + my $end_date = $params->{observationTimeStampRangeEnd}->[0] || undef; + my $repetitive_measurements_type = $params->{repetitiveMeasurements_type} || 'average'; #use default to average my $observation_unit_db_id = $params->{observationUnitDbId} || ($params->{observationUnitDbIds} || ()); # observationUnitLevelName # observationUnitLevelOrder @@ -279,7 +281,11 @@ sub _search { limit=>$limit, offset=>$offset, order_by=>"plot_number", - include_timestamp=>1 + #include_timestamp=>1, + start_date => $start_date, + end_date => $end_date, + repetitive_measurements => $repetitive_measurements_type, + include_dateless_items => 1 } ); my ($data, $unique_traits) = $phenotypes_search->search(); @@ -299,8 +305,23 @@ sub _search { seasonDbId => $_->{year} ); my $obs_timestamp = $_->{collect_date} ? $_->{collect_date} : $_->{timestamp}; - if ( $start_time && $obs_timestamp < $start_time ) { next; } #skip observations before date range - if ( $end_time && $obs_timestamp > $end_time ) { next; } #skip observations after date range + #since, the collect_date as stored as the timestamp in the database, we need to convert it to the correct format + if ($obs_timestamp) { + my ($obs_date, $obs_time) = split / /, $obs_timestamp; + my ($obs_year, $obs_month, $obs_day) = split /-/, $obs_date; + my ($start_year, $start_month, $start_day) = split /\-/, $start_date; + my ($end_year, $end_month, $end_day) = split /\-/, $end_date; + + if ($obs_year && $obs_month && $obs_day && $start_year && $start_month && $start_day && $end_year && $end_month && $end_day) { + my $obs_date_obj = DateTime->new({ year => $obs_year, month => $obs_month, day => $obs_day }); + my $start_date_obj = DateTime->new({ year => $start_year, month => $start_month, day => $start_day }); + my $end_date_obj = DateTime->new({ year => $end_year, month => $end_month, day => $end_day }); + + + if ( $start_date && (DateTime->compare($obs_date_obj, $start_date_obj) == -1 ) ) { next; } #skip observations before date range + if ( $end_date && (DateTime->compare($obs_date_obj, $end_date_obj) == 1 ) ) { next; } #skip observations after date range + } + } if ($counter >= $start_index && $counter <= $end_index) { push @data_window, { @@ -326,6 +347,7 @@ sub _search { } } + # print STDERR "Values of all the params: " . Dumper(\@data_window) . "\n"; return (\@data_window,$counter); } diff --git a/lib/CXGN/Dataset.pm b/lib/CXGN/Dataset.pm index b46f0010ad..49604d39c9 100644 --- a/lib/CXGN/Dataset.pm +++ b/lib/CXGN/Dataset.pm @@ -709,7 +709,7 @@ sub retrieve_phenotypes { push @trait_ids, $_->[0]; } - my $dataset_exluded_outliers = $self->exclude_dataset_outliers() ? $self->outliers() : undef; + my $dataset_excluded_outliers = $self->exclude_dataset_outliers() ? $self->outliers() : undef; my $phenotypes_search = CXGN::Phenotypes::PhenotypeMatrix->new( search_type=>'MaterializedViewTable', @@ -720,7 +720,7 @@ sub retrieve_phenotypes { accession_list=>\@accession_ids, exclude_phenotype_outlier=>$self->exclude_phenotype_outlier, include_phenotype_primary_key=>$self->include_phenotype_primary_key, - dataset_exluded_outliers=>$dataset_exluded_outliers + dataset_excluded_outliers=>$dataset_excluded_outliers ); my @data = $phenotypes_search->get_phenotype_matrix(); return \@data; diff --git a/lib/CXGN/Fieldbook/TraitProps.pm b/lib/CXGN/Fieldbook/TraitProps.pm index d4c56fdd68..4df8927fba 100644 --- a/lib/CXGN/Fieldbook/TraitProps.pm +++ b/lib/CXGN/Fieldbook/TraitProps.pm @@ -64,6 +64,7 @@ sub _get_cvterms { trait_maximum trait_details trait_categories + trait_repeat_type ); my $cv = $chado_schema->resultset("Cv::Cv") diff --git a/lib/CXGN/Phenotype.pm b/lib/CXGN/Phenotype.pm new file mode 100644 index 0000000000..ebc2dfad82 --- /dev/null +++ b/lib/CXGN/Phenotype.pm @@ -0,0 +1,204 @@ +package CXGN::Phenotype; + +use Moose; +use Data::Dumper; +use Bio::Chado::Schema; +use JSON qw | encode_json decode_json |; + +has 'schema' => ( + isa => 'Ref', + is => 'rw', + required => 1, + ); + +has 'phenotype_id' => ( + isa => 'Int|Undef', + is => 'rw', + ); + +has 'cvterm_id' => ( + isa => 'Int|Undef', + is => 'rw', + ); + +has 'cvterm_name' => ( + isa => 'Str', + is => 'rw' +); + +has 'value' => ( + isa => 'Str|Undef', + is => 'rw', + ); + +has 'stock_id' => ( + isa => 'Int', + is => 'rw', + ); + +has 'nd_experiment_id' => ( + isa => 'Str', + is => 'rw', + ); + +has 'operator' => ( + isa => 'Str', + is => 'rw', + ); + +has 'collect_date' => ( + isa => 'Str|Undef', + is => 'rw', + ); + +has 'image_id' => ( + isa => 'Int|Undef', + is => 'rw', + ); + +has 'existing_trait_value' => ( + isa => 'Str|Undef', + is => 'rw', + ); + +has 'unique_time' => ( + isa => 'Str|Undef', + is => 'rw', + ); + +has 'uniquename' => ( + isa => 'Str', + is => 'rw', + ); + +has 'experiment' => ( + isa => 'Bio::Chado::Schema::Result::NaturalDiversity::NdExperiment', + is => 'rw', + ); + +#has 'plot_trait_uniquename' => ( +# isa => 'Str|Undef', +# is => 'rw', +# ); + +sub store { + my $self = shift; + print STDERR "CXGN::Phenotype store \n"; + + my %experiment_ids = (); + my %nd_experiment_md_images; + my @overwritten_values; + + if (! $self->cvterm_id()) { + my $row = $self->schema->resultset("Cv::Cvterm")->find( { name => $self->cvterm_name() }); + if ($row) { + $self->cvterm_id($row->cvterm_id); + } + else { + die "The cvterm ".$self->cvterm_name()." does not exist. Exiting.\n"; + } + } + + if ($self->phenotype_id) { ### UPDATE + my $phenotype = $self->schema->resultset('Phenotype::Phenotype')-> + find( { phenotype_id => $self->phenotype_id() }); + ## should check that unit and variable (also checked here) are conserved in parse step, + ## if not reject before store + ## should also update operator in nd_experimentprops + + $phenotype->update({ + value => $self->value(), + cvalue_id => $self->cvterm_id(), + observable_id => $self->cvterm_id(), + uniquename => $self->uniquename(), + collect_date => $self->collect_date(), + operator => $self->operator(), + }); + + # $self->handle_timestamp($timestamp, $observation); + # $self->handle_operator($operator, $observation); + + my $q = "SELECT phenotype_id, nd_experiment_id, file_id + FROM phenotype + JOIN nd_experiment_phenotype using(phenotype_id) + JOIN nd_experiment_stock using(nd_experiment_id) + LEFT JOIN phenome.nd_experiment_md_files using(nd_experiment_id) + JOIN stock using(stock_id) + WHERE stock.stock_id=? + AND phenotype.cvalue_id=?"; + + my $h = $self->schema->storage->dbh()->prepare($q); + $h->execute($self->stock_id, $self->cvterm_id); + + while (my ($phenotype_id, $nd_experiment_id, $file_id) = $h->fetchrow_array()) { + push @overwritten_values, [ $file_id, $phenotype_id, $nd_experiment_id ]; + $experiment_ids{$nd_experiment_id} = 1; + if ($self->image_id) { + $nd_experiment_md_images{$nd_experiment_id} = $self->image_id; + } + } + return { success => 1, overwritten_values => \@overwritten_values, experiment_ids => \%experiment_ids, nd_experiment_md_images => \%nd_experiment_md_images }; + }else { # INSERT + my $phenotype_row = $self->schema->resultset('Phenotype::Phenotype')->create({ + cvalue_id => $self->cvterm_id(), + observable_id => $self->cvterm_id(), + value => $self->value(), + uniquename => $self->uniquename(), + collect_date => $self->collect_date(), + operator => $self->operator(), + }); + + #$self->handle_timestamp($timestamp, $phenotype->phenotype_id); + #$self->handle_operator($operator, $phenotype->phenotype_id); + + $self->experiment->create_related('nd_experiment_phenotypes',{ + phenotype_id => $phenotype_row->phenotype_id }); + $experiment_ids{$self->experiment->nd_experiment_id()} = 1; + if ($self->image_id) { + $nd_experiment_md_images{$self->experiment->nd_experiment_id()} = $self->image_id; + } + $self->phenotype_id($phenotype_row->phenotype_id()); + } + return { success => 1 }; +} + +sub store_external_references { + my $self = shift; + print STDERR "the CXGN::Phenotype store_external_references function\n"; + my $external_references = shift; + + my $external_references_type_id = SGN::Model::Cvterm->get_cvterm_row($self->schema(), 'phenotype_external_references', 'phenotype_property')->cvterm_id(); + + my $external_references_stored; + my $phenotype_external_references = $self->schema->resultset("Phenotype::Phenotypeprop")->find_or_create({ + phenotype_id => $self->phenotype_id, + type_id => $external_references_type_id, + }); + + $phenotype_external_references = $phenotype_external_references->update({ + value => encode_json $external_references, + }); + $external_references_stored = $phenotype_external_references->value ? decode_json $phenotype_external_references->value : undef; + + return $external_references_stored; +} + +sub store_additional_info { + my $self = shift; + my $additional_info = shift; + + my $phenotype_additional_info_type_id = SGN::Model::Cvterm->get_cvterm_row($self->schema(), 'phenotype_additional_info', 'phenotype_property')->cvterm_id(); + + my $pheno_additional_info = $self->schema()->resultset("Phenotype::Phenotypeprop")->find_or_create({ + phenotype_id => $self->phenotype_id, + type_id => $phenotype_additional_info_type_id, + }); + $pheno_additional_info = $pheno_additional_info->update({ + value => encode_json $additional_info, + }); + + my $additional_info_stored = $pheno_additional_info->value ? decode_json $pheno_additional_info->value : undef; + return $additional_info_stored; +} + +1; \ No newline at end of file diff --git a/lib/CXGN/Phenotypes/ParseUpload/Plugin/PhenotypeSpreadsheet.pm b/lib/CXGN/Phenotypes/ParseUpload/Plugin/PhenotypeSpreadsheet.pm index bad0d8226e..92130b1ea7 100644 --- a/lib/CXGN/Phenotypes/ParseUpload/Plugin/PhenotypeSpreadsheet.pm +++ b/lib/CXGN/Phenotypes/ParseUpload/Plugin/PhenotypeSpreadsheet.pm @@ -369,7 +369,8 @@ sub parse { if ( defined($trait_value) && defined($timestamp) ) { if ($trait_value ne '.'){ - $data{$plot_name}->{$trait_name} = [$trait_value, $timestamp]; + # for multiple values or time series, need to store all the values + push @{$data{$plot_name}->{$trait_name} }, [$trait_value, $timestamp]; } } } diff --git a/lib/CXGN/Phenotypes/ParseUpload/Plugin/PhenotypeSpreadsheetSimple.pm b/lib/CXGN/Phenotypes/ParseUpload/Plugin/PhenotypeSpreadsheetSimple.pm index 33435c41cb..64e614a56c 100644 --- a/lib/CXGN/Phenotypes/ParseUpload/Plugin/PhenotypeSpreadsheetSimple.pm +++ b/lib/CXGN/Phenotypes/ParseUpload/Plugin/PhenotypeSpreadsheetSimple.pm @@ -204,7 +204,8 @@ sub parse { if ( defined($trait_value) && defined($timestamp) ) { if ($trait_value ne '.'){ - $data{$observationunit_name}->{$trait_name} = [$trait_value, $timestamp]; + ### for multiple values or time series, need to store all the values + push @{$data{$observationunit_name}->{$trait_name} }, [$trait_value, $timestamp]; } } } diff --git a/lib/CXGN/Phenotypes/ParseUpload/Plugin/PhenotypeSpreadsheetSimpleGeneric.pm b/lib/CXGN/Phenotypes/ParseUpload/Plugin/PhenotypeSpreadsheetSimpleGeneric.pm index aefff21ade..91eeff38c6 100644 --- a/lib/CXGN/Phenotypes/ParseUpload/Plugin/PhenotypeSpreadsheetSimpleGeneric.pm +++ b/lib/CXGN/Phenotypes/ParseUpload/Plugin/PhenotypeSpreadsheetSimpleGeneric.pm @@ -124,7 +124,7 @@ sub parse { if ( defined($trait_value) && defined($timestamp) ) { if ($trait_value ne '.') { - $data{$observationunit_name}->{$trait_name} = [$trait_value, $timestamp]; + push @{$data{$observationunit_name}->{$trait_name}}, [$trait_value, $timestamp]; } } } diff --git a/lib/CXGN/Phenotypes/PhenotypeMatrix.pm b/lib/CXGN/Phenotypes/PhenotypeMatrix.pm index e1e3abb909..25dab8fdd9 100644 --- a/lib/CXGN/Phenotypes/PhenotypeMatrix.pm +++ b/lib/CXGN/Phenotypes/PhenotypeMatrix.pm @@ -22,7 +22,7 @@ my $phenotypes_search = CXGN::Phenotypes::PhenotypeMatrix->new( include_timestamp=>$include_timestamp, include_pedigree_parents=>$include_pedigree_parents, exclude_phenotype_outlier=>0, - dataset_exluded_outliers=>$dataset_exluded_outliers, + dataset_excluded_outliers=>$dataset_excluded_outliers, trait_contains=>$trait_contains, phenotype_min_value=>$phenotype_min_value, phenotype_max_value=>$phenotype_max_value, @@ -144,7 +144,7 @@ has 'exclude_phenotype_outlier' => ( default => 0 ); -has 'dataset_exluded_outliers' => ( +has 'dataset_excluded_outliers' => ( isa => 'ArrayRef[Int]|Undef', is => 'rw', ); @@ -192,14 +192,33 @@ has 'offset' => ( is => 'rw' ); +has 'repetitive_measurements' => ( + isa => 'Str', + is => 'rw', + default => sub { return 'average'; }, # can be first, last, average, all_values_single_line, sum, all_values_multiple_line + ); + +has 'single_measurements' => ( + isa => 'Str|Undef', + is => 'rw', + default => 'last', # can be first or last + ); + +has 'trait_repeat_types' => ( # returns the repeat type for every trait keyed by cvterm_id + isa => 'HashRef|Undef', + is => 'rw', + default => sub { return {} }, +); + sub get_phenotype_matrix { my $self = shift; my $include_pedigree_parents = $self->include_pedigree_parents(); my $include_timestamp = $self->include_timestamp; my $include_phenotype_primary_key = $self->include_phenotype_primary_key; + $self->trait_repeat_types( $self->retrieve_trait_repeat_types() ); print STDERR "GET PHENOMATRIX ".$self->search_type."\n"; - + my $phenotypes_search = CXGN::Phenotypes::SearchFactory->instantiate( $self->search_type, { @@ -217,13 +236,13 @@ sub get_phenotype_matrix { subplot_list=>$self->subplot_list, include_timestamp=>$include_timestamp, exclude_phenotype_outlier=>$self->exclude_phenotype_outlier, - dataset_exluded_outliers=>$self->dataset_exluded_outliers, + dataset_excluded_outliers=>$self->dataset_excluded_outliers, trait_contains=>$self->trait_contains, phenotype_min_value=>$self->phenotype_min_value, phenotype_max_value=>$self->phenotype_max_value, - start_date => $self->start_date(), - end_date => $self->end_date(), - include_dateless_items => $self->include_dateless_items(), + start_date => $self->start_date(), + end_date => $self->end_date(), + include_dateless_items => $self->include_dateless_items(), limit=>$self->limit, offset=>$self->offset } @@ -270,6 +289,8 @@ sub get_phenotype_matrix { push @info, \@line; + #print STDERR "DATA = ".Dumper($data); + foreach my $obs_unit (@$data){ my $entry_type = $obs_unit->{obsunit_is_a_control} ? 'check' : 'test'; my $synonyms = $obs_unit->{germplasm_synonyms}; @@ -295,43 +316,36 @@ sub get_phenotype_matrix { push @line, ($parents->{'mother'}, $parents->{'mother_id'}, $parents->{'father'}, $parents->{'father_id'}); } - my $observations = $obs_unit->{observations}; -# print STDERR "OBSERVATIONS =".Dumper($observations)."\n"; - my $include_timestamp = $self->include_timestamp; - my %trait_observations; - my %phenotype_ids; - my $dataset_exluded_outliers_ref = $self->dataset_exluded_outliers; - foreach my $observation (@$observations){ - my $collect_date = $observation->{collect_date}; - my $timestamp = $observation->{timestamp}; - - if ($include_timestamp && $timestamp) { - $trait_observations{$observation->{trait_name}} = "$observation->{value},$timestamp"; - } - elsif ($include_timestamp && $collect_date) { - $trait_observations{$observation->{trait_name}} = "$observation->{value},$collect_date"; - } - else { - $trait_observations{$observation->{trait_name}} = $observation->{value}; - } - - # dataset outliers will be empty fields if are in @$dataset_exluded_outliers_ref list of pheno_id outliers - if(grep {$_ == $observation->{'phenotype_id'}} @$dataset_exluded_outliers_ref) { - $trait_observations{$observation->{trait_name}} = ''; # empty field for outlier NA - } - } - - if ($include_phenotype_primary_key) { - foreach my $observation (@$observations) { - $phenotype_ids{$observation->{trait_name}} = $observation->{phenotype_id}; - } - } - foreach my $trait (@sorted_traits) { - push @line, $trait_observations{$trait}; - if ($include_phenotype_primary_key) { - push @line, $phenotype_ids{$trait}; - } - } + #print STDERR "OBS UNIT = ".Dumper($obs_unit); + my $observations = $obs_unit->{observations}; + + #print STDERR "OBSERVATIONS BEFORE FORMAT: ".Dumper($observations); + + # if (scalar(@$observations) > 0) { + + + my %phenotype_ids; + my %trait_observations = (); + if (@$observations > 0) { + %trait_observations = $self->format_observations($observations); + } + + #print STDERR "FORMATTED OBSERVATIONS =".Dumper(\%trait_observations)."\n"; + + if ($include_phenotype_primary_key) { + foreach my $observation (@$observations) { + $phenotype_ids{$observation->{trait_name}} = $observation->{phenotype_id}; + } + } + foreach my $trait (@sorted_traits) { + + push @line, $trait_observations{$trait}; + + if ($include_phenotype_primary_key) { + push @line, $phenotype_ids{$trait}; + } + } + push @line, $obs_unit->{notes}; # add treatment values to each obsunit line @@ -345,9 +359,11 @@ sub get_phenotype_matrix { push @info, \@line; } - } else { + } + else { ### NATIVE ??!! + $data = $phenotypes_search->search(); - #print STDERR "DOWNLOAD DATA =".Dumper($data)."\n"; + #print STDERR "the download data structure =". Dumper($data)."\n"; my %obsunit_data; my %traits; @@ -355,8 +371,25 @@ sub get_phenotype_matrix { print STDERR "No of lines retrieved: ".scalar(@$data)."\n"; print STDERR "Construct Pheno Matrix Start:".localtime."\n"; my @unique_obsunit_list = (); - my %seen_obsunits; - + my %seen_obsunits; + + foreach my $d (@$data) { + my $value = ""; + + my $timestamp = $d->{timestamp}; + if ($timestamp) { $timestamp =~ s/^\s+|\s+$//g; } + + if ($include_timestamp && $timestamp) { + $value = "$d->{phenotype_value},$d->{timestamp}"; + # print STDERR "value with phenotypes and timestamp: $value\n"; + } + else { + $value = $d->{phenotype_value}; + # print STDERR "value only with phenotypes: $value\n"; + } + push @{ $obsunit_data{$d->{obsunit_stock_id}}->{$d->{trait_name} } }, $value; + } + foreach my $d (@$data) { my $cvterm = $d->{trait_name}; if ($cvterm){ @@ -369,11 +402,107 @@ sub get_phenotype_matrix { my $timestamp_value = $d->{timestamp}; my $value = $d->{phenotype_value}; #my $cvterm = $trait."|".$cvterm_accession; - if ($include_timestamp && $timestamp_value) { - $obsunit_data{$obsunit_id}->{$cvterm} = "$value,$timestamp_value"; - } else { - $obsunit_data{$obsunit_id}->{$cvterm} = $value; - } + # if ($include_timestamp && $timestamp_value) { + # $obsunit_data{$obsunit_id}->{$cvterm} = "$value,$timestamp_value"; + # } else { + # $obsunit_data{$obsunit_id}->{$cvterm} = $value; + # } + + if (ref($obsunit_data{$obsunit_id}->{$cvterm}) eq "ARRAY") { + # print STDERR "the the obsunit_data : " . Dumper($obsunit_data{$obsunit_id}->{$cvterm}); + my @sorted_measurements = @{$obsunit_data{$obsunit_id}->{$cvterm}}; + #sort the measurements by timestamp + @sorted_measurements = sort { + my ($value_a, $timestamp_a) = split(',', $a); + my ($value_b, $timestamp_b) = split(',', $b); + ($timestamp_a || '') cmp ($timestamp_b || '') + } @sorted_measurements; + + if ($self->repetitive_measurements() eq "first") { + # $obsunit_data{$obsunit_id}->{$cvterm} = shift(@{$obsunit_data{$obsunit_id}->{$cvterm}}); + $obsunit_data{$obsunit_id}->{$cvterm} = $sorted_measurements[0]; + } + + if ($self->repetitive_measurements() eq "last") { + # $obsunit_data{$obsunit_id}->{$cvterm} = pop(@{$obsunit_data{$obsunit_id}->{$cvterm}}); + $obsunit_data{$obsunit_id}->{$cvterm} = $sorted_measurements[-1]; + } + + if ($self->repetitive_measurements() eq "average") { + my $count = 0; + my $sum = 0; + foreach my $v (@{ $obsunit_data{$obsunit_id}->{$cvterm}}) { + # print STDERR "the value of v in the average = $v\n"; + my ($value, $timestamp); + if (defined($v)) { + ($value, $timestamp) = split(',', $v); + } + #if timestamp is undefined, $v is the last measurement + $value = $v unless defined $timestamp; + if (defined($value)) { + $sum += $value; + $count++; + } + } + if($count >0) { + my $averaged_values = $sum/$count; + # the timestamp for the average values, will be the latest (or the last measurement, timestamp). Therefore, am retreving the timestamp of the last measurement !! + my $last_measurement = $sorted_measurements[-1]; + # since, the values are stored with the timestamp, need to split them to get the timestamp of the last_measurment !! + my ($last_value, $last_timestamp) = split(',', $last_measurement); + $last_value = $last_measurement unless defined $last_timestamp; + # conditionally include, if the timestamp !! + if ($include_timestamp && defined $last_timestamp) { + $obsunit_data{$obsunit_id}->{$cvterm} = "$averaged_values, $last_timestamp"; + } else { + $obsunit_data{$obsunit_id}->{$cvterm} = $averaged_values; + } + } + else { + $obsunit_data{$obsunit_id}->{$cvterm} = undef; + } + + } + + if ($self->repetitive_measurements() eq "sum") { + my $sum_all_values = 0; + foreach my $v (@{ $obsunit_data{$obsunit_id}->{$cvterm}}) { + # print STDERR "the value of v in the sum = $v\n"; + my ($value, $timestamp); + if (defined($v)) { + ($value, $timestamp) = split(',', $v); + } + if (defined($value)) { + $sum_all_values += $value; + } + } + # It's same as in the average above, retrieve the last_measurement timestamp !! + my $last_measurement = $sorted_measurements[-1]; + + my ($last_value, $last_timestamp) = (undef, undef); + + if ($last_measurement) { + ($last_value, $last_timestamp) = split(',', $last_measurement); + } + + #$last_value = $last_measurement unless defined $last_timestamp; + + # Store the sum of all values, with the last_measurement timestamp !! + # Conditionally include the timestamp + if ($include_timestamp && defined $last_timestamp) { + $obsunit_data{$obsunit_id}->{$cvterm} = "$sum_all_values, $last_timestamp"; + } else { + $obsunit_data{$obsunit_id}->{$cvterm} = $sum_all_values; + } + } + + if ($self->repetitive_measurements() eq "all_values_single_line") { + no warnings; + $obsunit_data{$obsunit_id}->{$cvterm} = join("|",@{$obsunit_data{$obsunit_id}->{$cvterm}}); + # print STDERR "ALL VALUES SINGLE LINE = ".Dumper $obsunit_data{$obsunit_id}->{$cvterm}; + } + } + $obsunit_data{$obsunit_id}->{'notes'} = $d->{notes}; my $synonyms = $d->{synonyms}; @@ -421,8 +550,8 @@ sub get_phenotype_matrix { $traits{$cvterm}++; } } - #print STDERR Dumper \%plot_data; - #print STDERR Dumper \%traits; + #print STDERR "PLOT DATA = ".Dumper \%plot_data; + #print STDERR "TRAITS = ".Dumper \%traits; # retrieve treatments my $project_object = CXGN::BreedersToolbox::Projects->new( { schema => $self->bcs_schema }); @@ -449,22 +578,74 @@ sub get_phenotype_matrix { push @info, \@line; foreach my $p (@unique_obsunit_list) { - my @line = @{$obsunit_data{$p}->{metadata}}; + my @metadata = @{$obsunit_data{$p}->{metadata}}; + my $notes = $obsunit_data{$p}->{'notes'}; + + if ($self->repetitive_measurements() eq "all_values_multiple_line") { ##this block is only for when repetitive_measurement option is "all_values_multiple_line" !!! + # check how many values for each trait are recorded !!! + my $max_measurements = 0; + foreach my $trait (@sorted_traits) { + my $trait_values = $obsunit_data{$p}->{$trait}; + if (ref($trait_values) eq 'ARRAY') { + my $count = scalar(@$trait_values); + $max_measurements = $count if $count > $max_measurements; + } else { + $max_measurements = 1 if $max_measurements < 1; + } + } - foreach my $trait (@sorted_traits) { - push @line, $obsunit_data{$p}->{$trait}; - } - push @line, $obsunit_data{$p}->{'notes'}; + ## store the values in separate row + for (my $multi_line = 0; $multi_line < $max_measurements; $multi_line++) { + my @line = @metadata; + + foreach my $trait (@sorted_traits) { + my $trait_values = $obsunit_data{$p}->{$trait}; + + if (ref($trait_values) eq 'ARRAY') { + # Get the ith value if it exists, else undef + my $value = $trait_values->[$multi_line]; + push @line, $value; + } else { + # Single value + push @line, $multi_line == 0 ? $trait_values : undef; + } + } + + push @line, $multi_line == 0 ? $notes : undef; + + # Add treatment values only once + if ($multi_line == 0) { + my %unit_treatments = $treatment_details->{$p} ? %{$treatment_details->{$p}} : (); + foreach my $name (@$treatment_names) { + push @line, $unit_treatments{$name}; + } + } else { + # Fill with undef or empty strings + foreach my $name (@$treatment_names) { + push @line, undef; + } + } + + push @info, \@line; + } + }else{#this block is for all other repetitive options including - first, last, average, sum, and all values_in_single_line !! + my @line = @{$obsunit_data{$p}->{metadata}}; - # add treatment values to each obsunit line - my %unit_treatments; - if ($treatment_details->{$p}) { - %unit_treatments = %{$treatment_details->{$p}}; - }; - foreach my $name (@$treatment_names) { - push @line, $unit_treatments{$name}; + foreach my $trait (@sorted_traits) { + push @line, $obsunit_data{$p}->{$trait}; + } + push @line, $obsunit_data{$p}->{'notes'}; + + # add treatment values to each obsunit line + my %unit_treatments; + if ($treatment_details->{$p}) { + %unit_treatments = %{$treatment_details->{$p}}; + }; + foreach my $name (@$treatment_names) { + push @line, $unit_treatments{$name}; + } + push @info, \@line; } - push @info, \@line; } } @@ -473,4 +654,249 @@ sub get_phenotype_matrix { return @info; } +sub format_observations { + my $self = shift; + my $observations = shift; + + if (scalar(@$observations) == 0) { + print STDERR "No observations in this obs_unit... Skipping.\n"; + return []; + } + + my %trait_observations; + my $include_timestamp = $self->include_timestamp; + my $dataset_excluded_outliers_ref = $self->dataset_excluded_outliers; + + my $de_duplicated_observations = $self->detect_multiple_measurements($observations); + #print STDERR "DE-DUPLICATED OBSERVATIONS = ".Dumper($de_duplicated_observations); + foreach my $observation (@$de_duplicated_observations){ + #print STDERR "OBSERVATION = ".Dumper($observation); + my $collect_date = $observation->{collect_date}; + #print STDERR "OBSERVATION = ". Dumper($observation); + my $timestamp = $observation->{timestamp}; + if (defined($timestamp)) { $timestamp =~ s/^\s+|\s+$//g; } + if (defined($collect_date)) { $collect_date =~ s/^s+|\s+$//g; } + + if ($include_timestamp && $timestamp) { + + if (ref($observation->{value}) eq 'ARRAY') { + #print STDERR "processing OBSERVATION with timestamp: "; #.Dumper($observation); + $observation->{value} = join("|", map { $_->{value}.",".$timestamp} @$observation); + $trait_observations{$observation->{trait_name}} = $observation->{value}; + } + else { + $trait_observations{$observation->{trait_name}} = "$observation->{value},$timestamp"; + } + } + elsif ($include_timestamp && $collect_date) { + if (ref($observation->{value}) eq 'ARRAY') { + #print STDERR "processing OBSERVATION with collect_date: "; #Dumper($observation); + $observation->{value} = join("|", map {$_->{value}.",".$collect_date} @$observation); + $trait_observations{$observation->{trait_name}} = $$observation->{value}; + } + else { + $trait_observations{$observation->{trait_name}} = "$observation->{value},$collect_date"; + } + } + else { + if (ref($observation->{value}) eq 'ARRAY') { + #print STDERR "Processing observation alone\n"; + $observation->{value} = join("|", @{$observation->{value}}); + $trait_observations{$observation->{trait_name}} = $observation->{value}; + } + else { + #print STDERR "Single value processing ($observation->{value})!\n"; + $trait_observations{$observation->{trait_name}} = $observation->{value}; + } + } + + ### FOR debugging only: + #$trait_observations{$observation->{trait_name}}.=$observation->{squash_method}; + + # dataset outliers will be empty fields if are in @$dataset_excluded_outliers_ref list of pheno_id outliers + if(grep {$_ == $observation->{'phenotype_id'}} @$dataset_excluded_outliers_ref) { + $trait_observations{$observation->{trait_name}} = ''; # empty field for outlier NA + } + } + + #print STDERR "detecting multiple observations in ".Dumper($observations); + return %trait_observations; +} + +sub detect_multiple_measurements { + my $self = shift; + my $trait_observations = shift; + + my %duplicate_measurements; + +# print STDERR "CHECKING MULTIPLE MEASUREMENTS...\n"; + + if (! $trait_observations) { return []; } + foreach my $o (@$trait_observations) { + my $trait_id = $o->{trait_id}; + push @{$duplicate_measurements{$trait_id}}, $o; + } + + foreach my $trait_id (keys %duplicate_measurements) { + if (scalar(@{$duplicate_measurements{$trait_id}})>1) { + #print STDERR "De-duplicating measurements... ".Dumper($duplicate_measurements{$trait_id}); + + my $trait_observations = $self->process_duplicate_measurements($duplicate_measurements{$trait_id}); + $duplicate_measurements{$trait_id} = [ $trait_observations ]; + + #print STDERR "After de-duplication: ".Dumper($duplicate_measurements{$trait_id}); + } + } + + #print STDERR "DUPLICATE MEASUREMENTS: ".Dumper(\%duplicate_measurements); + + my @processed_observations; + foreach my $trait_id (keys %duplicate_measurements) { + push @processed_observations, @{$duplicate_measurements{$trait_id}}[0]; + } + + #print STDERR "PROCESSED observations = ".Dumper(\@processed_observations); + + return \@processed_observations; +} + +sub process_duplicate_measurements { + my $self = shift; + my $trait_observations = shift; + + #print STDERR "PROCESSING DUPLICATES WITH ".Dumper($trait_observations); + + if ($self->repetitive_measurements() eq "first") { + print STDERR "Retrieving first value...\n"; + $trait_observations = $trait_observations->[0]; + $trait_observations->{squash_method} = "first"; + } + + if ($self->repetitive_measurements() eq "last") { + print STDERR "Retrieving last value...\n"; + $trait_observations = $trait_observations->[-1] ; + $trait_observations->{squash_method} = "last"; + } + + if ($self->repetitive_measurements() eq "average") { + print STDERR "Averaging values ...\n"; + $trait_observations = $self->average_observations($trait_observations); + $trait_observations->{squash_method} = "average"; + } + + if ($self->repetitive_measurements() eq "sum") { + print STDERR "Summing values ...\n"; + $trait_observations = $self->sum_observations($trait_observations); + $trait_observations->{squash_method} = "sum"; + } + + if ($self->repetitive_measurements() eq "all_values_single_line") { + print STDERR "Retrieving all values...\n"; + my $collated_multiple_observation = $trait_observations->[0]; + my @trait_values; + foreach my $o (@$trait_observations) { + push @trait_values, $o->{value}; + } + + $collated_multiple_observation->{value} = \@trait_values; + $collated_multiple_observation->{squash_method} = $self->repetitive_measurements(); + $trait_observations = $collated_multiple_observation; + } + + if ($self->repetitive_measurements() eq "all_values_multiple_line") { + foreach my $value (@$trait_observations) { + $value->{squash_method} = $self->repetitive_measurements(); + # print STDERR "the all values in multiple line is : " . Dumper($value) . "\n"; + } + } + + #print STDERR "DONE WITH DUPLICATES, NOW: ".Dumper($trait_observations); + return $trait_observations; +} + +sub average_observations { + my $self = shift; + my $observations_ref = shift || []; + + if (! @$observations_ref) { return; } + + #print STDERR "Averaging Observations: ".Dumper($observations_ref); + + my $sum = undef; + my $count = 0; + my @values; + foreach my $v (@$observations_ref) { + if (! $v->{outlier} && defined($v->{value}) ) { + $sum += $v->{value}; + $count++; + push @values, $v->{value}; + } + } + + my $avg; + my $stddev; + + if (defined($sum) && ($count > 0) ) { # make sure to return undef for measurements that are all undef + $avg = $sum / $count; + + my $sqr_diff; + + foreach my $v (@$observations_ref) { + my $diff = $v->{value} - $avg; + $sqr_diff += $diff * $diff; + $count++; + } + $stddev = sqrt($sqr_diff/$count); + } + + my $averaged_observation = $observations_ref->[0]; + $averaged_observation->{value} = $avg; + $averaged_observation->{stddev} = $stddev; + $averaged_observation->{averaged_from} = join(", ", @values); + #print STDERR "Averaged Observation: ".Dumper( $averaged_observation ); + + return $averaged_observation; + +} + +sub sum_observations { + my $self = shift; + my $observations_ref = shift || []; + + if (! @$observations_ref) { return; } + + #print STDERR "add all the obs of this trait: ".Dumper($observations_ref); + + my $sum = 0; + my @values; + foreach my $v (@$observations_ref) { + if (! $v->{outlier} && defined($v->{value}) ) { + $sum += $v->{value}; + push @values, $v->{value}; + } + } + + my $summed_observation = $observations_ref->[0]; + $summed_observation->{value} = $sum; + $summed_observation->{summed_from} = join(", ", @values); + #print STDERR "add all the obs for this trait: ".Dumper( $summed_observation ); + + return $summed_observation; +} + +sub retrieve_trait_repeat_types { + my $self = shift; + + my %property_by_cvterm_id; + my $sql = "SELECT cvtermprop.value, cvterm.cvterm_id, cvterm.name FROM cvterm join cvtermprop on(cvterm.cvterm_id=cvtermprop.cvterm_id) join cvterm as proptype on(cvtermprop.type_id=proptype.cvterm_id) where proptype.name='trait_repeat_type' "; + my $sth= $self->bcs_schema()->storage()->dbh()->prepare($sql); + $sth->execute(); + while (my ($property_value, $cvterm_id, $cvterm_name) = $sth->fetchrow_array) { + $property_by_cvterm_id{$cvterm_id} = $property_value; + } + + return \%property_by_cvterm_id; +} + + 1; diff --git a/lib/CXGN/Phenotypes/Search/MaterializedViewTable.pm b/lib/CXGN/Phenotypes/Search/MaterializedViewTable.pm index 58f80dd90d..18e0cb65f0 100644 --- a/lib/CXGN/Phenotypes/Search/MaterializedViewTable.pm +++ b/lib/CXGN/Phenotypes/Search/MaterializedViewTable.pm @@ -174,6 +174,7 @@ has 'order_by' => ( is => 'rw' ); + sub search { my $self = shift; my $schema = $self->bcs_schema(); @@ -365,13 +366,13 @@ sub search { my $calendar_funcs = CXGN::Calendar->new({}); my %unique_traits; - while (my ($observationunit_stock_id, $observationunit_uniquename, $observationunit_type_name, $germplasm_uniquename, $germplasm_stock_id, $rep, $block, $plot_number, $row_number, $col_number, $plant_number, $is_a_control, $notes, $trial_id, $trial_name, $trial_description, $plot_width, $plot_length, $field_size, $field_trial_is_planned_to_be_genotyped, $field_trial_is_planned_to_cross, $breeding_program_id, $breeding_program_name, $breeding_program_description, $year, $design, $location_id, $planting_date, $harvest_date, $folder_id, $folder_name, $folder_description, $seedlot_transaction, $seedlot_stock_id, $seedlot_uniquename, $seedlot_current_weight_gram, $seedlot_current_count, $seedlot_box_name, $available_germplasm_seedlots, $treatments, $observations, $full_count) = $h->fetchrow_array()) { + while (my ($observationunit_stock_id, $observationunit_uniquename, $observationunit_type_name, $germplasm_uniquename, $germplasm_stock_id, $rep, $block, $plot_number, $row_number, $col_number, $plant_number, $is_a_control, $notes, $trial_id, $trial_name, $trial_description, $plot_width, $plot_length, $field_size, $field_trial_is_planned_to_be_genotyped, $field_trial_is_planned_to_cross, $breeding_program_id, $breeding_program_name, $breeding_program_description, $year, $design, $location_id, $planting_date, $harvest_date, $folder_id, $folder_name, $folder_description, $seedlot_transaction, $seedlot_stock_id, $seedlot_uniquename, $seedlot_current_weight_gram, $seedlot_current_count, $seedlot_box_name, $available_germplasm_seedlots, $treatments_json, $observations_json, $full_count) = $h->fetchrow_array()) { my $harvest_date_value = $calendar_funcs->display_start_date($harvest_date); my $planting_date_value = $calendar_funcs->display_start_date($planting_date); my $synonyms = $synonym_hash_lookup{$germplasm_uniquename}; my $location_name = $location_id ? $location_id_lookup{$location_id} : ''; - my $observations = JSON::XS->new->decode($observations); - my $treatments = JSON::XS->new->decode($treatments); + my $observations = JSON::XS->new->decode($observations_json); + my $treatments = JSON::XS->new->decode($treatments_json); my $available_germplasm_seedlots = JSON::XS->new->decode($available_germplasm_seedlots); my $seedlot_transaction = $seedlot_transaction ? JSON::XS->new->decode($seedlot_transaction) : {}; @@ -381,66 +382,75 @@ sub search { } my @return_observations; + my @observations_per_trait; foreach my $pheno_id (sort keys %ordered_observations){ - my $o = $ordered_observations{$pheno_id}; - my $trait_name = $o->{trait_name}; - if ($filter_trait_names){ - my $skip; - foreach (@{$self->trait_contains}){ - if (index($trait_name, $_) == -1) { - $skip = 1; - } - } - if ($skip){ - next; - } - } - if ($filter_trait_ids){ - if (!$trait_list_check{$o->{trait_id}}){ - next; - } - } - - if ($filter_observation_ids){ - my $skip; - foreach (@{$self->observation_id_list}){ - if (index($o->{phenotype_id}, $_) == -1) { - $skip = 1; - } - } - if ($skip){ - next; - } - } - - my $phenotype_uniquename = $o->{uniquename}; - $unique_traits{$trait_name}++; - if ($include_timestamp){ - my $timestamp_value; - my $operator_value; - if ($phenotype_uniquename){ - my ($p1, $p2) = split /date: /, $phenotype_uniquename; - if ($p2){ - my ($timestamp, $operator_value) = split / operator = /, $p2; - # this regex won't work for timestamps saved in ISO 8601 format - if ( $timestamp =~ m/(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(\S)(\d{4})/) { - $timestamp_value = $timestamp; - } - } - } - $o->{timestamp} = $timestamp_value; - } - if (!$o->{operator}){ - if ($phenotype_uniquename){ - my ($p1, $p2) = split /date: /, $phenotype_uniquename; - if ($p2){ - my ($timestamp, $operator_value) = split / operator = /, $p2; - $o->{operator} = $operator_value; - } - } - } - push @return_observations, $o; - } + + my $o = $ordered_observations{$pheno_id}; + + ###print STDERR "O: ".Dumper($o); + + my $trait_name = $o->{trait_name}; + if ($filter_trait_names){ + my $skip; + foreach (@{$self->trait_contains}){ + if (index($trait_name, $_) == -1) { + $skip = 1; + } + } + if ($skip){ + next; + } + } + if ($filter_trait_ids){ + if (!$trait_list_check{$o->{trait_id}}){ + next; + } + } + + if ($filter_observation_ids){ + my $skip; + foreach (@{$self->observation_id_list}){ + if (index($o->{phenotype_id}, $_) == -1) { + $skip = 1; + } + } + if ($skip){ + next; + } + } + + + + my $phenotype_uniquename = $o->{uniquename}; + $unique_traits{$trait_name}++; + if ($include_timestamp){ + + my $timestamp_value; + my $operator_value; + if ($phenotype_uniquename){ + my ($p1, $p2) = split /date: /, $phenotype_uniquename; + if ($p2){ + my ($timestamp, $operator_value) = split / operator = /, $p2; + # this regex won't work for timestamps saved in ISO 8601 format + if ( $timestamp =~ m/(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(\S)(\d{4})/) { + $timestamp_value = $timestamp; + } + } + } + $o->{timestamp} = $timestamp_value; + + } + if (!$o->{operator}){ + if ($phenotype_uniquename){ + my ($p1, $p2) = split /date: /, $phenotype_uniquename; + if ($p2){ + my ($timestamp, $operator_value) = split / operator = /, $p2; + $o->{operator} = $operator_value; + } + } + } + push @return_observations, $o; + } no warnings 'uninitialized'; diff --git a/lib/CXGN/Phenotypes/Search/Native.pm b/lib/CXGN/Phenotypes/Search/Native.pm index b8958ffb99..f3ddb46271 100644 --- a/lib/CXGN/Phenotypes/Search/Native.pm +++ b/lib/CXGN/Phenotypes/Search/Native.pm @@ -48,6 +48,7 @@ use Try::Tiny; use Data::Dumper; use SGN::Model::Cvterm; use CXGN::Stock::StockLookup; +use CXGN::Trial; use CXGN::Trial::TrialLayout; use CXGN::Calendar; @@ -225,6 +226,8 @@ sub search { my $using_layout_hash; #For performance reasons the number of joins to stock can be reduced if a trial is given. If trial(s) given, use the cached layout from TrialLayout instead. + print STDERR "start date here: ".$self->start_date()." and the end date here: ".$self->end_date()."\n"; + if ($self->trial_list && scalar(@{$self->trial_list})>0) { $using_layout_hash = 1; @@ -398,7 +401,7 @@ sub search { my $datelessq = ""; if ($self->include_dateless_items()) { - $datelessq = " phenotype.create_date IS NULL OR "; + $datelessq = " phenotype.collect_date IS NULL OR "; } my ($start_date, $end_date); @@ -406,13 +409,16 @@ sub search { $start_date = $1; } - if ($self->end_date() =~ m/\d{4}\-\d{2}\-\d{2}/ ) { + if ($self->end_date() =~ m/(\d{4}\-\d{2}\-\d{2})/) { $end_date = $1; } - if ($start_date && $end_date) { - push @where_clause, " ( $datelessq ( phenotype.create_date > $start_date and phenotype.create_date < $end_date ) ) "; + #print STDERR "the start date here: $start_date. And the end date here: $end_date\n"; + if ($start_date && $end_date) { + #print STDERR "including the date query\n"; + push @where_clause, " ( $datelessq ( phenotype.collect_date >= '$start_date'::date and phenotype.collect_date <= '$end_date'::date ) ) "; + #push @where_clause, " ( $datelessq ( phenotype.collect_date >= $start_date and phenotype.collect_date <= $end_date ) ) "; } diff --git a/lib/CXGN/Phenotypes/StorePhenotypes.pm b/lib/CXGN/Phenotypes/StorePhenotypes.pm index 85e55192ac..7e6a9f5c71 100644 --- a/lib/CXGN/Phenotypes/StorePhenotypes.pm +++ b/lib/CXGN/Phenotypes/StorePhenotypes.pm @@ -57,6 +57,7 @@ use CXGN::UploadFile; use CXGN::List::Transform; use CXGN::Stock; use CXGN::Tools::Run; +use CXGN::Phenotype; has 'bcs_schema' => ( isa => 'Bio::Chado::Schema', @@ -169,7 +170,7 @@ has 'ignore_new_values' => ( has 'metadata_hash' => ( isa => "HashRef", is => 'rw', - required => 1 + required => 1, ); has 'image_zipfile_path' => ( @@ -181,21 +182,25 @@ has 'image_zipfile_path' => ( has 'trait_objs' => ( isa => "HashRef", is => 'rw', + default => sub { {} }, ); has 'unique_value_trait_stock' => ( isa => "HashRef", is => 'rw', + default => sub { {} }, ); has 'unique_trait_stock' => ( isa => "HashRef", is => 'rw', + default => sub { {} }, ); has 'unique_trait_stock_timestamp' => ( isa => "HashRef", is => 'rw', + default => sub { {} }, ); has 'composable_validation_check_name' => ( @@ -210,16 +215,63 @@ has 'allow_repeat_measures' => ( default => 0 ); +has 'check_file_stock_trait_duplicates' => ( + isa => "HashRef", + is => 'rw', + default => sub { {} }, +); + +has 'same_value_count' => ( + isa => 'Int', + is => 'rw', +); + +has 'check_trait_category' => ( + isa => 'HashRef', + is => 'rw', + default => sub { {} }, +); + +has 'check_trait_format' => ( + isa => 'HashRef', + is => 'rw', + default => sub { {} }, +); + +has 'check_trait_min_value' => ( + isa => 'HashRef', + is => 'rw', + default => sub { {} }, +); + +has 'check_trait_max_value' => ( + isa => 'HashRef', + is => 'rw', + default => sub { {} }, +); + +has 'check_trait_repeat_type' => ( + isa => 'HashRef', + is => 'rw', + default => sub { {} }, +); + +has 'image_plot_full_names' => ( + isa => 'HashRef', + is => 'rw', + default => sub { {} }, +); + #build is used for creating hash lookups in this case sub create_hash_lookups { my $self = shift; my $schema = $self->bcs_schema; #Find trait cvterm objects and put them in a hash - my %trait_objs; + #$self->trait_objs({}); #initialize with empty list my @trait_list = @{$self->trait_list}; @trait_list = map { $_ eq 'notes' ? () : ($_) } @trait_list; # omit notes from trait validation - print STDERR "trait list after filtering @trait_list\n"; + # print STDERR "trait list after filtering @trait_list\n"; my @stock_list = @{$self->stock_list}; my @cvterm_ids; @@ -230,56 +282,75 @@ sub create_hash_lookups { foreach my $trait_name (@trait_list) { print STDERR "trait: $trait_name\n"; my $trait_cvterm = SGN::Model::Cvterm->get_cvterm_row_from_trait_name($schema, $trait_name); - $trait_objs{$trait_name} = $trait_cvterm; - push @cvterm_ids, $trait_cvterm->cvterm_id(); + $self->trait_objs->{$trait_name} = $trait_cvterm; + + my $trait_cvterm_id = $trait_cvterm->cvterm_id(); + + my $trait_category_props = $self->get_trait_props($trait_cvterm_id, 'trait_categories'); + $self->check_trait_category($trait_category_props); + + my $trait_format_props = $self->get_trait_props($trait_cvterm_id, 'trait_format'); + $self->check_trait_format($trait_format_props); + + my $trait_min_value_props = $self->get_trait_props($trait_cvterm_id, 'trait_minimum'); + # print STDERR "Trait min value props: ".Dumper($trait_min_value_props); + + $self->check_trait_min_value($trait_min_value_props); + # print STDERR "Trait min value hash: ".Dumper($self->check_trait_min_value); + + my $trait_max_value_props = $self->get_trait_props($trait_cvterm_id, 'trait_maximum'); + # print STDERR "Trait max value props: ".Dumper($trait_max_value_props); + + $self->check_trait_max_value($trait_max_value_props); + # print STDERR "Trait max value hash: ".Dumper($self->check_trait_max_value); + + my $trait_repeat_type_props = $self->get_trait_props($trait_cvterm_id, 'trait_repeat_type'); + $self->check_trait_repeat_type($trait_repeat_type_props); + + push @cvterm_ids, $trait_cvterm_id; } - $self->trait_objs(\%trait_objs); - - #for checking if values in the file are already stored in the database or in the same file - my %check_unique_trait_stock; - my %check_unique_trait_stock_timestamp; - my %check_unique_value_trait_stock; + # checking if values in the file are already stored in the database or in the same file + # my $stock_ids_sql = join ("," , @{$self->stock_id_list}); #print STDERR "Cvterm ids are @cvterm_ids"; + if (scalar @cvterm_ids > 0) { my $cvterm_ids_sql = join ("," , @cvterm_ids); my $previous_phenotype_q = "SELECT phenotype.value, phenotype.cvalue_id, phenotype.collect_date, stock.stock_id FROM phenotype LEFT JOIN nd_experiment_phenotype USING(phenotype_id) LEFT JOIN nd_experiment USING(nd_experiment_id) LEFT JOIN nd_experiment_stock USING(nd_experiment_id) LEFT JOIN stock USING(stock_id) WHERE stock.stock_id IN ($stock_ids_sql) AND phenotype.cvalue_id IN ($cvterm_ids_sql);"; my $h = $schema->storage->dbh()->prepare($previous_phenotype_q); $h->execute(); - #my $previous_phenotype_rs = $schema->resultset('Phenotype::Phenotype')->search({'me.cvalue_id'=>{-in=>\@cvterm_ids}, 'stock.stock_id'=>{-in=>$self->stock_id_list}}, {'join'=>{'nd_experiment_phenotypes'=>{'nd_experiment'=>{'nd_experiment_stocks'=>'stock'}}}, 'select' => ['me.value', 'me.cvalue_id', 'stock.stock_id'], 'as' => ['value', 'cvterm_id', 'stock_id']}); while (my ($previous_value, $cvterm_id, $collect_timestamp, $stock_id) = $h->fetchrow_array()) { - #while (my $previous_phenotype_cvterm = $previous_phenotype_rs->next() ) { - #my $cvterm_id = $previous_phenotype_cvterm->get_column('cvterm_id'); - #my $stock_id = $previous_phenotype_cvterm->get_column('stock_id'); + if ($stock_id){ #my $previous_value = $previous_phenotype_cvterm->get_column('value') || ' '; $collect_timestamp = $collect_timestamp || 'NA'; - $check_unique_trait_stock{$cvterm_id, $stock_id} = $previous_value; - $check_unique_trait_stock_timestamp{$cvterm_id, $stock_id, $collect_timestamp} = $previous_value; - $check_unique_value_trait_stock{$previous_value, $cvterm_id, $stock_id} = 1; + $self->unique_trait_stock->{$cvterm_id, $stock_id} = $previous_value; + $self->unique_trait_stock_timestamp->{$cvterm_id, $stock_id, $collect_timestamp} = $previous_value; + $self->unique_value_trait_stock->{$previous_value, $cvterm_id, $stock_id} = 1; } } - } - $self->unique_value_trait_stock(\%check_unique_value_trait_stock); - $self->unique_trait_stock(\%check_unique_trait_stock); - $self->unique_trait_stock_timestamp(\%check_unique_trait_stock_timestamp); + # $self->check_trait_category($self->get_trait_props('trait_categories')); + # $self->check_trait_format($self->get_trait_props('trait_format')); + # $self->check_trait_min_value($self->get_trait_props('trait_minimum')); + # $self->check_trait_max_value($self->get_trait_props('trait_maximum')); + # $self->check_trait_repeat_type($self->get_trait_props('trait_repeat_type')); } sub verify { my $self = shift; - print STDERR "CXGN::Phenotypes::StorePhenotypes verify\n"; + # print STDERR "CXGN::Phenotypes::StorePhenotypes verify\n"; my @plot_list = @{$self->stock_list}; my @trait_list = @{$self->trait_list}; @trait_list = map { $_ eq 'notes' ? () : ($_) } @trait_list; # omit notes from trait validation - print STDERR Dumper \@trait_list; - my %plot_trait_value = %{$self->values_hash}; - my %phenotype_metadata = %{$self->metadata_hash}; - my $timestamp_included = $self->has_timestamps; + # print STDERR Dumper \@trait_list; + # my %plot_trait_value = %{$self->values_hash}; + # my %phenotype_metadata = %{$self->metadata_hash}; + # my $timestamp_included = $self->has_timestamps; my $archived_image_zipfile_with_path = $self->image_zipfile_path; my $schema = $self->bcs_schema; my $transaction_error; @@ -297,9 +368,9 @@ sub verify { my $warning_message = ''; if (scalar(@plots_missing) > 0 || scalar(@traits_missing) > 0) { - print STDERR "Plots or traits not valid\n"; - print STDERR "Invalid plots: ".join(", ", map { "'$_'" } @plots_missing)."\n" if (@plots_missing); - print STDERR "Invalid traits: ".join(", ", map { "'$_'" } @traits_missing)."\n" if (@traits_missing); + # print STDERR "Plots or traits not valid\n"; + # print STDERR "Invalid plots: ".join(", ", map { "'$_'" } @plots_missing)."\n" if (@plots_missing); + # print STDERR "Invalid traits: ".join(", ", map { "'$_'" } @traits_missing)."\n" if (@traits_missing); $error_message = "Invalid plots:
    ".join(",
    ", map { "'$_'" } @plots_missing) if (@plots_missing); $error_message = "Invalid traits:
    ".join(",
    ", map { "'$_'" } @traits_missing) if (@traits_missing); @@ -315,28 +386,20 @@ sub verify { } $self->create_hash_lookups(); - my %trait_objs = %{$self->trait_objs}; - my %check_unique_value_trait_stock = %{$self->unique_value_trait_stock}; - my %check_unique_trait_stock = %{$self->unique_trait_stock}; - my %check_unique_trait_stock_timestamp = %{$self->unique_trait_stock_timestamp}; - - my %check_trait_category; - my $sql = "SELECT b.value, c.cvterm_id from cvtermprop as b join cvterm as a on (b.type_id = a.cvterm_id) join cvterm as c on (b.cvterm_id=c.cvterm_id) where a.name = 'trait_categories';"; - my $sth = $schema->storage->dbh->prepare($sql); - $sth->execute(); - while (my ($category_value, $cvterm_id) = $sth->fetchrow_array) { - $check_trait_category{$cvterm_id} = $category_value; - } - my %check_trait_format; - $sql = "SELECT b.value, c.cvterm_id from cvtermprop as b join cvterm as a on (b.type_id = a.cvterm_id) join cvterm as c on (b.cvterm_id=c.cvterm_id) where a.name = 'trait_format';"; - $sth = $schema->storage->dbh->prepare($sql); - $sth->execute(); - while (my ($format_value, $cvterm_id) = $sth->fetchrow_array) { - $check_trait_format{$cvterm_id} = $format_value; - } + ### note: moved these variables below to accessors + #my %trait_objs = %{$self->trait_objs}; + #my %check_unique_value_trait_stock = %{$self->unique_value_trait_stock}; + #my %check_unique_trait_stock = %{$self->unique_trait_stock}; + #my %check_unique_trait_stock_timestamp = %{$self->unique_trait_stock_timestamp}; + + # my %check_trait_category = $self->get_trait_props('trait_categories'); + # my %check_trait_format = $self->get_trait_props('trait_format'); + # my %check_trait_min_value = $self->get_trait_props('trait_minimum'); + # my %check_trait_max_value = $self->get_trait_props('trait_maximum'); + # my %check_trait_repeat_type = $self->get_trait_props('trait_repeat_type'); + #my %image_plot_full_names; - my %image_plot_full_names; #This is for saving Fieldbook images, which are only associated to a stock. To save images that are associated to a stock and a trait and a value, use the ExcelAssociatedImages parser if ($archived_image_zipfile_with_path) { @@ -348,7 +411,7 @@ sub verify { my $file_names_stripped = $archived_zipfile_return[0]; my $file_names_full = $archived_zipfile_return[1]; foreach (@$file_names_full) { - $image_plot_full_names{$_} = 1; + $self->image_plot_full_names->{$_} = 1; } my %plot_name_check; foreach (@plot_list) { @@ -363,156 +426,275 @@ sub verify { } } - my %check_file_stock_trait_duplicates; - - my $same_value_count = 0; - foreach my $plot_name (@plot_list) { - foreach my $trait_name (@trait_list) { - my $value_array = $plot_trait_value{$plot_name}->{$trait_name}; - #print STDERR Dumper $value_array; - my $trait_value = $value_array->[0]; - my $timestamp = $value_array->[1]; - #print STDERR "$plot_name, $trait_name, $trait_value\n"; - if ( defined($trait_value) ) { - my $trait_cvterm = $trait_objs{$trait_name}; - my $trait_cvterm_id = $trait_cvterm->cvterm_id(); - my $stock_id = $schema->resultset('Stock::Stock')->find({'uniquename' => $plot_name})->stock_id(); - - #Trait values can be non alphanumeric - #if ($trait_value eq '.' || ($trait_value =~ m/[^a-zA-Z0-9,.\-\/\_]/ && $trait_value ne '.')){ - # $error_message = $error_message."Trait values must be alphanumeric with no spaces:
    Plot Name: ".$plot_name."
    Trait Name: ".$trait_name."
    Value: ".$trait_value."

    "; - #} - - #check that trait value is valid for trait name - if (exists($check_trait_format{$trait_cvterm_id})) { - if ($check_trait_format{$trait_cvterm_id} eq 'numeric') { - my $trait_format_checked = looks_like_number($trait_value); - if (!$trait_format_checked && $trait_value ne '') { - $error_message = $error_message."This trait value should be numeric:
    Plot Name: ".$plot_name."
    Trait Name: ".$trait_name."
    Value: ".$trait_value."

    "; - } - } - if ($check_trait_format{$trait_cvterm_id} eq 'image') { - $trait_value =~ s/^.*photos\///; - if (!exists($image_plot_full_names{$trait_value})) { - $error_message = $error_message."For Plot Name: $plot_name there should be a corresponding image named in the zipfile called $trait_value.
    "; - } - } - } - - if (exists($check_trait_category{$trait_cvterm_id})) { - my @check_values; - - my @trait_categories = split /\//, $check_trait_category{$trait_cvterm_id}; - my %trait_categories_hash; - - if ($check_trait_format{$trait_cvterm_id} eq "Multicat") { - @check_values = split /\:/, $trait_value; - } - else { - @check_values = ( $trait_value ); - } - - if ($check_trait_format{$trait_cvterm_id} eq 'Ordinal' || $check_trait_format{$trait_cvterm_id} eq 'Nominal' || $check_trait_format{$trait_cvterm_id} eq 'Multicat') { - # Ordinal looks like = - - foreach my $ordinal_category (@trait_categories) { - my @split_value = split('=', $ordinal_category); - if (scalar(@split_value) >= 1) { - $trait_categories_hash{$split_value[0]} = 1; - } - } - } else { - # Catch everything else - %trait_categories_hash = map { $_ => 1 } @trait_categories; - } - - foreach my $tw (@check_values) { - if ($tw ne '' && !exists($trait_categories_hash{$tw})) { - my $valid_values = join("/", sort keys %trait_categories_hash); # Sort values for consistent order - $error_message = "This trait value should be one of $valid_values:
    Plot Name: $plot_name
    Trait Name: $trait_name
    Value: $trait_value

    "; - print STDERR $error_message; - } else { - print STDERR "Trait value is valid $tw.\n"; - } - } - } + # PERFORMS CHECKS in the following way: + # + # IMPORTANT: for multiple and time_series trait_repeat_types, the acquisition datetime + # must be present! + # + # * check that values are of the correct format (numeric vs string vs date etc) + # * if categorical, check if legal categories + # * if numerical, check boundaries (trait_minimum, trait_maximum) + # * if trait_repeat_type = single, check if measurement has already been taken, and + # emit warning depending on selected mode (overwrite vs. not) + # * if trait_repeat_type = multiple, check if measurement already exists, otherwise add + # * if trait_repeat_type = time_series, check if measurement for time point already exists, + # otherwise add + # + + my ($errors, $warnings); + my ($all_errors, $all_warnings); + + # print STDERR "values hash = ".Dumper($self->values_hash()); + + # foreach my $plot_name (@plot_list) { + # foreach my $trait_name (@trait_list) { + foreach my $plot_name (keys %{$self->values_hash()}) { + foreach my $trait_name (keys %{$self->values_hash()->{$plot_name}}) { + my $measurements_array = $self->values_hash()->{$plot_name}->{$trait_name}; + + if ( (ref($measurements_array) eq "ARRAY") && ref($measurements_array->[0]) eq 'ARRAY') { ### we have a list of measurements, not just a trait_value timestamp pair + # print STDERR "Trait name = $trait_name\n"; + foreach my $value_array (@$measurements_array) { + # print STDERR "Value array = ".Dumper($value_array)."\n"; + ($warnings, $errors) = $self->check_measurement($plot_name, $trait_name, $value_array); + $all_errors .= $errors; + $all_warnings .= $warnings; + } + }else { + ($warnings, $errors) = $self->check_measurement($plot_name, $trait_name, $measurements_array); + $all_errors .= $errors; + $all_warnings .= $warnings; + } + } + } + return ($all_warnings, $all_errors); +} - #print STDERR "$trait_value, $trait_cvterm_id, $stock_id\n"; - #check if the plot_name, trait_name combination already exists in database. - if (exists($check_unique_value_trait_stock{$trait_value, $trait_cvterm_id, $stock_id})) { - my $prev = $check_unique_value_trait_stock{$trait_value, $trait_cvterm_id, $stock_id}; - if ( defined($prev) && length($prev) && defined($trait_value) && length($trait_value) ) { - $same_value_count++; - } - } elsif (exists($check_unique_trait_stock_timestamp{$trait_cvterm_id, $stock_id, $timestamp})) { - my $prev = $check_unique_trait_stock_timestamp{$trait_cvterm_id, $stock_id, $timestamp}; - if ( defined($prev) ) { - $warning_message = $warning_message."$plot_name already has a different value ($prev) than in your file (" . ($trait_value ? $trait_value : "blank") . ") stored in the database for the trait $trait_name for the timestamp $timestamp.
    "; - } - } elsif (exists($check_unique_trait_stock{$trait_cvterm_id, $stock_id})) { - my $prev = $check_unique_trait_stock{$trait_cvterm_id, $stock_id}; - if ( defined($prev) ) { - $warning_message = $warning_message."$plot_name already has a different value ($prev) than in your file (" . ($trait_value ? $trait_value : "blank") . ") stored in the database for the trait $trait_name.
    "; - } +sub check_measurement { + my $self = shift; + my $plot_name = shift; + my $trait_name = shift; + my $value_array = shift; + + my $error_message = ""; + my $warning_message = ""; + + print STDERR "check_measurement for trait $trait_name and values ".Dumper($value_array)."\n"; + + #print STDERR Dumper $value_array; + my ($trait_value, $timestamp); + if (ref($value_array) eq 'ARRAY') { + # the entry represents trait + timestamp + # + $trait_value = $value_array->[0]; + $timestamp = $value_array->[1]; + } + elsif (ref($value_array) eq "HASH") { + # the trait is a high dimensional trait - we can't check + print STDERR "TRAIT VALUE IS HIGH DIMENSIONAL - skipping.\n"; + return (undef, undef); + } + else { + # it's a scalar + # + $trait_value = $value_array; + } + #print STDERR "$plot_name, $trait_name, $trait_value\n"; + if ( defined($trait_value) && $trait_name ne "notes" ) { + print STDERR "TRAIT NAME = ".Dumper( $trait_name)."\n"; + my $trait_cvterm = $self->trait_objs->{$trait_name}; + my $trait_cvterm_id = $trait_cvterm->cvterm_id(); + # print STDERR "the trait cvterm id of this trait is: " . $trait_cvterm_id . "\n"; + my $stock_id = $self->bcs_schema->resultset('Stock::Stock')->find({'uniquename' => $plot_name})->stock_id(); + + #Trait values can be non alphanumeric + #if ($trait_value eq '.' || ($trait_value =~ m/[^a-zA-Z0-9,.\-\/\_]/ && $trait_value ne '.')){ + # $error_message = $error_message."Trait values must be alphanumeric with no spaces:
    Plot Name: ".$plot_name."
    Trait Name: ".$trait_name."
    Value: ".$trait_value."

    "; + #} + + #check that trait value is valid for trait name + if (exists($self->check_trait_format->{$trait_cvterm_id})) { + # print STDERR "Trait minimum value checks if it exists: " . $self->check_trait_min_value->{$trait_cvterm_id} . "\n"; + if ($self->check_trait_format->{$trait_cvterm_id} eq 'numeric') { + my $trait_format_checked = looks_like_number($trait_value); + if (!$trait_format_checked && $trait_value ne '') { + $error_message = $error_message."This trait value should be numeric:
    Plot Name: ".$plot_name."
    Trait Name: ".$trait_name."
    Value: ".$trait_value."

    "; + } + my $trait_min = defined $self->check_trait_min_value->{$trait_cvterm_id} ? $self->check_trait_min_value->{$trait_cvterm_id} : undef; + my $trait_max = defined $self->check_trait_max_value->{$trait_cvterm_id} ? $self->check_trait_max_value->{$trait_cvterm_id} : undef; + + print STDERR "the trait minimum: Trait Minimum for trait $trait_name: ", (defined $trait_min ? $trait_min : undef), "\n"; + print STDERR "the trait maximum: Trait Maximum for trait $trait_name: ", (defined $trait_max ? $trait_max : undef), "\n"; + + if (defined $trait_min && $trait_value < $trait_min) { + $error_message .= "For trait '$trait_name' the trait value $trait_value should not be smaller than the defined trait_minimum, $trait_min.
    "; + } else { + print STDERR "the trait min and trait value : No minimum value defined for trait '$trait_name' (cvterm_id: $trait_cvterm_id).\n"; } - #check if the plot_name, trait_name combination already exists in same file. - if (exists($check_file_stock_trait_duplicates{$trait_cvterm_id, $stock_id})) { - $warning_message = $warning_message."$plot_name already has a value for the trait $trait_name in your file. Possible duplicate in your file?
    "; + if (defined $trait_max && $trait_value > $trait_max) { + $error_message .= "For the trait '$trait_name' the trait value $trait_value should not be larger than the defined trait_maximum, $trait_max.
    "; + }else { + print STDERR "the trait max and trait value: No maximum value defined for trait '$trait_name' (cvterm_id: $trait_cvterm_id). \n"; } - $check_file_stock_trait_duplicates{$trait_cvterm_id, $stock_id} = 1; - } - if ($timestamp_included) { - if ( (!$timestamp && !$trait_value) || ($timestamp && !$trait_value) || ($timestamp && $trait_value) ) { - if ($timestamp) { - if( !$timestamp =~ m/(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(\S)(\d{4})/) { - $error_message = $error_message."Bad timestamp for value for Plot Name: ".$plot_name."
    Trait Name: ".$trait_name."
    Should be YYYY-MM-DD HH:MM:SS-0000 or YYYY-MM-DD HH:MM:SS+0000

    "; - } - } - } + # if ($trait_value < $self->check_trait_min_value->{$trait_cvterm_id}) { + # # print STDERR "Trait value: $trait_value, Trait minimum value: ".$self->check_trait_min_value->{$trait_cvterm_id}."\n"; + # $error_message .= "For trait '$trait_name' the trait value $trait_value should not be smaller than the defined trait_minimum, ". $self->check_trait_min_value->{$trait_cvterm_id}. ""; + # } + # if ($trait_value > $self->check_trait_max_value->{$trait_cvterm_id}) { + # # print STDERR "Trait value: $trait_value, Trait min value: ".$self->check_trait_min_value->{$trait_cvterm_id}."\n"; + # $error_message .= "For the trait '$trait_name' The trait value $trait_value should not be larger than the defined trait_maximum, ".$self->check_trait_max_value->{$trait_cvterm_id}.""; + # } + } + + #check, if the trait value is an image + if ($self->check_trait_format->{$trait_cvterm_id} eq 'image') { + $trait_value =~ s/^.*photos\///; + if (!exists($self->image_plot_full_names->{$trait_value})) { + $error_message = $error_message."For Plot Name: $plot_name there should be a corresponding image named in the zipfile called $trait_value.
    "; + } + } + } + + if (exists($self->check_trait_category->{$trait_cvterm_id})) { + my @trait_categories = sort(split /\//, $self->check_trait_category->{$trait_cvterm_id}); + my %trait_categories_hash; + # print STDERR "Trait categories: ".Dumper(\@trait_categories)."\n"; + # print STDERR "Trait categories hash: ".Dumper(\%trait_categories_hash)."\n"; + my @check_values; + # print STDERR "Check values: ".Dumper(\@check_values)."\n"; + if ($self->check_trait_format->{$trait_cvterm_id} eq 'Multicat') { + @check_values = split /\:/, $trait_value; + }else { + @check_values = ( $trait_value ); } - + if ($self->check_trait_format->{$trait_cvterm_id} eq 'Ordinal' || $self->check_trait_format->{$trait_cvterm_id} eq 'Nominal' || $self->check_trait_format->{$trait_cvterm_id} eq 'Multicat') { + # Ordinal looks like = + foreach my $ordinal_category (@trait_categories) { + my @split_value = split('=', $ordinal_category); + if (scalar(@split_value) >= 1) { + $trait_categories_hash{$split_value[0]} = 1; + } + } + } else { + # Catch everything else + %trait_categories_hash = map { $_ => 1 } @trait_categories; + } + + foreach my $value (@check_values) { + if ($value ne '' && !exists($trait_categories_hash{$value})) { + my $valid_values = join("/", sort keys %trait_categories_hash); # Sort values for consistent order + $error_message = " This trait value should be one of $valid_values:
    Plot Name: $plot_name
    Trait Name: $trait_name
    Value: $trait_value

    "; + print STDERR "The error in the value $error_message \n"; + }else { + print STDERR "Trait value $trait_value is valid\n"; + } + } } + + my $repeat_type = "single"; + if (exists($self->check_trait_repeat_type->{$trait_cvterm_id})) { + if (grep /$repeat_type/, ("single", "multiple", "time_series")) { + $repeat_type = $self->check_trait_repeat_type->{$trait_cvterm_id}; + # print STDERR "Trait repeat type: $repeat_type\n"; + }else { + print STDERR "the trait repeat type of $self->check_trait_repeat_type->{$trait_cvterm_id} has no meaning. Assuming 'single'.\n"; + } + } + + if ($repeat_type eq "multiple" or $repeat_type eq "time_series") { + # print STDERR "Trait repeat type: $repeat_type\n"; + if (!$timestamp) { + # print STDERR "trait name : $trait_name is multiple without timestamp \n"; + $error_message .= "For trait $trait_name that is defined as a 'multiple' or 'time_series' repeat type trait, a timestamp is required.\n"; + } + if (exists($self->unique_trait_stock_timestamp->{$trait_cvterm_id, $stock_id, $timestamp})) { + # print STDERR "trait name : $trait_name with timestamp \n"; + $error_message .= "For the multiple measurement trait $trait_name the observation unit $plot_name already has a value associated with it at exactly the same time"; + } + } + + #print STDERR "$trait_value, $trait_cvterm_id, $stock_id\n"; + #check if the plot_name, trait_name combination already exists in database. + if ($repeat_type eq "single") { + if (exists($self->unique_value_trait_stock->{$trait_value, $trait_cvterm_id, $stock_id})) { + my $prev = $self->unique_value_trait_stock->{$trait_value, $trait_cvterm_id, $stock_id}; + if ( defined($prev) && length($prev) && defined($trait_value) && length($trait_value) ) { + $self->same_value_count($self->same_value_count() + 1); + } + } elsif (exists($self->unique_trait_stock_timestamp->{$trait_cvterm_id, $stock_id, $timestamp})) { + my $prev = $self->unique_trait_stock_timestamp->{$trait_cvterm_id, $stock_id, $timestamp}; + if ( defined($prev) ) { + $warning_message = $warning_message."$plot_name already has a different value ($prev) than in your file (" . ($trait_value ? $trait_value : "blank") . ") stored in the database for the trait $trait_name for the timestamp $timestamp.
    "; + } + } elsif (exists($self->unique_trait_stock->{$trait_cvterm_id, $stock_id})) { + my $prev = $self->unique_trait_stock->{$trait_cvterm_id, $stock_id}; + if ( defined($prev) ) { + $warning_message = $warning_message."$plot_name already has a different value ($prev) than in your file (" . ($trait_value ? $trait_value : "blank") . ") stored in the database for the trait $trait_name.
    "; + } + } + + #check if the plot_name, trait_name combination already exists in same file. + if (exists($self->check_file_stock_trait_duplicates->{$trait_cvterm_id, $stock_id})) { + $warning_message = $warning_message."$plot_name already has a value for the trait $trait_name in your file. Possible duplicate in your file?
    "; + } + $self->check_file_stock_trait_duplicates()->{$trait_cvterm_id, $stock_id} = 1; + + }else { ## multiple or time_series - warn only if the timestamp/value are identical + if (exists($self->unique_trait_stock_timestamp->{$trait_cvterm_id, $stock_id, $timestamp}) && $self->unique_trait_stock_timestamp->{$trait_cvterm_id, $stock_id, $timestamp} eq $trait_value) { + $warning_message .= "For trait 'trait_name', the timepoint $timestamp for stock $stock_id already has a measurement with the same value $trait_value associated with it.
    "; + } + } } + if ($self->has_timestamps()) { + if ( (!$timestamp && !$trait_value) || ($timestamp && !$trait_value) || ($timestamp && $trait_value) ) { + if ($timestamp) { + if( !$timestamp =~ m/(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(\S)(\d{4})/) { + $error_message = $error_message."Bad timestamp for value for Plot Name: ".$plot_name."
    Trait Name: ".$trait_name."
    Should be YYYY-MM-DD HH:MM:SS-0000 or YYYY-MM-DD HH:MM:SS+0000

    "; + } + } + } + } # combine all warnings about the same values into a summary count - if ( $same_value_count > 0 ) { - $warning_message = $warning_message."There are $same_value_count values in your file that are the same as values already stored in the database."; + if ( defined($self->same_value_count()) && ($self->same_value_count > 0) ) { + $warning_message = $warning_message."There are ".$self->same_value_count()." values in your file that are the same as values already stored in the database."; } ## Verify metadata - if ($phenotype_metadata{'archived_file'} && (!$phenotype_metadata{'archived_file_type'} || $phenotype_metadata{'archived_file_type'} eq "")) { + if ($self->metadata_hash->{'archived_file'} && (!$self->metadata_hash->{'archived_file_type'} || $self->metadata_hash->{'archived_file_type'} eq "")) { $error_message = "No file type provided for archived file."; return ($warning_message, $error_message); } - if (!$phenotype_metadata{'operator'} || $phenotype_metadata{'operator'} eq "") { - $error_message = "No operaror provided in file upload metadata."; + if (!$self->metadata_hash->{'operator'} || $self->metadata_hash->{'operator'} eq "") { + $warning_message = "No operator provided in file upload metadata."; return ($warning_message, $error_message); } - if (!$phenotype_metadata{'date'} || $phenotype_metadata{'date'} eq "") { + if (!$self->metadata_hash->{'date'} || $self->metadata_hash->{'date'} eq "") { $error_message = "No date provided in file upload metadata."; return ($warning_message, $error_message); } + # print STDERR "warnings : $warning_message, Errors: $error_message\n"; return ($warning_message, $error_message); } sub store { my $self = shift; - print STDERR "CXGN::Phenotypes::StorePhenotypes store\n"; - + # print STDERR "CXGN::Phenotypes::StorePhenotypes store\n"; + $self->create_hash_lookups(); my %linked_data = %{$self->get_linked_data()}; my @plot_list = @{$self->stock_list}; - my @trait_list = @{$self->trait_list}; + my @trait_list = @{$self->trait_list}; @trait_list = map { $_ eq 'notes' ? () : ($_) } @trait_list; # omit notes so they can be handled separately - my %trait_objs = %{$self->trait_objs}; - my %plot_trait_value = %{$self->values_hash}; - my %phenotype_metadata = %{$self->metadata_hash}; - my $timestamp_included = $self->has_timestamps; + # my %trait_objs = %{$self->trait_objs}; + # my %plot_trait_value = %{$self->values_hash}; + # my %phenotype_metadata = %{$self->metadata_hash}; + # my $timestamp_included = $self->has_timestamps; my $archived_image_zipfile_with_path = $self->image_zipfile_path; - my $phenotype_metadata = $self->metadata_hash; + # my $phenotype_metadata = $self->metadata_hash; my $schema = $self->bcs_schema; my $metadata_schema = $self->metadata_schema; my $phenome_schema = $self->phenome_schema; @@ -523,12 +705,12 @@ sub store { my $error_message; my $transaction_error; my $user_id = $self->user_id; - my $archived_file = $phenotype_metadata->{'archived_file'}; - my $archived_file_type = $phenotype_metadata->{'archived_file_type'}; - my $operator = $phenotype_metadata->{'operator'}; - my $upload_date = $phenotype_metadata->{'date'}; + my $archived_file = $self->metadata_hash->{'archived_file'}; + my $archived_file_type = $self->metadata_hash->{'archived_file_type'}; + my $operator = $self->metadata_hash->{'operator'}; + my $upload_date = $self->metadata_hash->{'date'}; my $success_message; - + my $phenotyping_experiment_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'phenotyping_experiment', 'experiment_type')->cvterm_id(); my $local_date_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'date', 'local')->cvterm_id(); my $local_operator_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'operator', 'local')->cvterm_id(); @@ -543,17 +725,16 @@ sub store { my @stored_details; my %nd_experiment_md_images; - my %check_unique_trait_stock = %{$self->unique_trait_stock}; - + # my %check_unique_trait_stock = %{$self->unique_trait_stock}; my $rs; my %data; $rs = $schema->resultset('Stock::Stock')->search( {'type.name' => ['field_layout', 'analysis_experiment', 'sampling_layout'], 'me.type_id' => [$plot_cvterm_id, $plant_cvterm_id, $subplot_cvterm_id, $tissue_sample_cvterm_id, $analysis_instance_cvterm_id], 'me.stock_id' => {-in=>$self->stock_id_list } }, {join=> {'nd_experiment_stocks' => {'nd_experiment' => ['type', 'nd_experiment_projects' ] } } , - '+select'=> ['me.stock_id', 'me.uniquename', 'nd_experiment.nd_geolocation_id', 'nd_experiment_projects.project_id'], - '+as'=> ['stock_id', 'uniquename', 'nd_geolocation_id', 'project_id'] + '+select'=> ['me.stock_id', 'me.uniquename', 'nd_experiment.nd_geolocation_id', 'nd_experiment_projects.project_id'], + '+as'=> ['stock_id', 'uniquename', 'nd_geolocation_id', 'project_id'] } - ); + ); while (my $s = $rs->next()) { $data{$s->get_column('uniquename')} = [$s->get_column('stock_id'), $s->get_column('nd_geolocation_id'), $s->get_column('project_id') ]; } @@ -567,257 +748,278 @@ sub store { my $skip_count = 0; my $overwrite_count = 0; my $remove_count = 0; - foreach my $plot_name (@plot_list) { + # print STDERR "(store) values hash ".Dumper($self->values_hash()); + # foreach my $plot_name (@plot_list) { + foreach my $plot_name (keys %{$self->values_hash()}) { + my $stock_id = $data{$plot_name}[0]; my $location_id = $data{$plot_name}[1]; my $project_id = $data{$plot_name}[2]; - + # create plot-wide nd_experiment entry - + my $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->create({ nd_geolocation_id => $location_id, type_id => $phenotyping_experiment_cvterm_id, nd_experimentprops => [{type_id => $local_date_cvterm_id, value => $upload_date}, {type_id => $local_operator_cvterm_id, value => $operator}], nd_experiment_projects => [{project_id => $project_id}], nd_experiment_stocks => [{stock_id => $stock_id, type_id => $phenotyping_experiment_cvterm_id}] - }); - + }); + $experiment_ids{$experiment->nd_experiment_id()}=1; - + # Check if there is a note for this plot, If so add it using dedicated function - my $note_array = $plot_trait_value{$plot_name}->{'notes'}; + my $note_array = $self->values_hash->{$plot_name}->{'notes'}; + print STDERR "check note array is defined: " . Dumper($note_array) . "\n"; if (defined $note_array) { $self->store_stock_note($stock_id, $note_array, $operator); } - + # Check if there is nirs data for this plot - my $nirs_hashref = $plot_trait_value{$plot_name}->{'nirs'}; + my $nirs_hashref = $self->values_hash->{$plot_name}->{'nirs'}; if (defined $nirs_hashref) { $self->store_high_dimensional_data($nirs_hashref, $experiment->nd_experiment_id(), 'nirs_spectra'); $new_count++; } - + # Check if there is transcriptomics data for this plot - my $transcriptomics_hashref = $plot_trait_value{$plot_name}->{'transcriptomics'}; + my $transcriptomics_hashref = $self->values_hash->{$plot_name}->{'transcriptomics'}; if (defined $transcriptomics_hashref) { $self->store_high_dimensional_data($transcriptomics_hashref, $experiment->nd_experiment_id(), 'transcriptomics'); $new_count++; } - + # Check if there is metabolomics data for this plot - my $metabolomics_hashref = $plot_trait_value{$plot_name}->{'metabolomics'}; + my $metabolomics_hashref = $self->values_hash->{$plot_name}->{'metabolomics'}; if (defined $metabolomics_hashref) { $self->store_high_dimensional_data($metabolomics_hashref, $experiment->nd_experiment_id(), 'metabolomics'); $new_count++; } - - foreach my $trait_name (@trait_list) { - - #print STDERR "trait: $trait_name\n"; - my $trait_cvterm = $trait_objs{$trait_name}; - - my $value_array = $plot_trait_value{$plot_name}->{$trait_name}; - - my @values; - - # convert to array or array format for single array values to accept old format inputs without refactoring - if (ref($value_array->[0]) ne 'ARRAY') { - push @values, $value_array; - } else { - @values = @{$value_array}; - } - - foreach my $value (@values) { - - # perl doesn't have a problem attempting to access possibly non existing indices - my $trait_value = $value->[0]; - my $timestamp = $value->[1]; - $operator = $value->[2] ? $value->[2] : $operator; - my $observation = $value->[3]; - my $image_id = $value->[4]; - my $additional_info = $value->[5] || undef; - my $external_references = $value->[6] || undef; - my $unique_time = $timestamp && defined($timestamp) ? $timestamp : 'NA' . $upload_date; - my $existing_trait_value = $check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id}; - - if (defined($trait_value) && (length($trait_value) || $remove_values)) { - - if ($ignore_new_values) { - if (exists($check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id})) { - $skip_count++; - next; - } - } + + # foreach my $trait_name (@trait_list) { + foreach my $trait_name (keys %{$self->values_hash()->{$plot_name}}) { + my $measurements_array = $self->values_hash()->{$plot_name}->{$trait_name}; + print STDERR "TRAIT: $trait_name\n"; + + if ($trait_name eq "notes") { + # we already dealt with notes, which are stored as stockprops... + print STDERR "skipping notes trait (already stored as stockprop)...\n"; + next; + } + my $trait_cvterm = $self->trait_objs->{$trait_name}; + + my $measurements_array = $self->values_hash->{$plot_name}->{$trait_name}; + # print STDERR "measurement array : ".Dumper($measurements_array); + # print STDERR "reference measurement array = ".ref($measurements_array->[0])."\n"; + if ( (ref($measurements_array) eq "ARRAY") && ref($measurements_array->[0]) ne "ARRAY") { + ## multiple measurements, have structure [ [ value, timestamp ], [ value, timestamp ]... ] instead of just [ value, timestamp ] for single measurements + # print STDERR "Adding to sub array...\n"; + $measurements_array = [ $measurements_array ]; + } + + # print STDERR "MEASUREMENT ARRAY ".Dumper($measurements_array); + + my $value_count = 0; + if (ref($measurements_array) eq "ARRAY") { + foreach my $value_array(@$measurements_array) { + # print STDERR "CHECKING $plot_name, $trait_name, ".Dumper($value_array)."\n"; + + # this should not give any $errors now + + + + my ($warnings, $errors) = $self->check_measurement($plot_name, $trait_name, $value_array); + + if ($errors) { die "Trying to store phenotypes with the following errors: $errors"; } + + # convert to array or array format for single array values to accept old format inputs without refactoring + #if (ref($value_array->[0]) ne 'ARRAY') { + # push @values, $value_array; + # } else { + # @values = @{$value_array}; + # } + + # print STDERR "VALUE ARRAY: ".Dumper($value_array); + + #foreach my $value (@$value_array) { + + my $phenotype_object = CXGN::Phenotype->new( { schema => $schema }); + # print STDERR "complete phenotype_object: ".Dumper($phenotype_object)."\n"; + # perl doesn't have a problem attempting to access possibly non existing indices + my $trait_value = $value_array->[0]; + # print STDERR "the trait value in the phenotype object: $trait_value\n"; + $phenotype_object->value($trait_value); + my $timestamp = $value_array->[1]; + # print STDERR "the timestamp in the phenotype object: $timestamp\n"; + + if ($timestamp eq "") { $timestamp = undef; } + $phenotype_object->collect_date($timestamp); + # print STDERR "the collect date in the phenotype object: $timestamp\n"; + $operator = $value_array->[2] ? $value_array->[2] : $operator; + $phenotype_object->operator($operator); + my $observation = $value_array->[3]; + # print STDERR "the value array: " . Dumper ($value_array) . "\n"; + # print STDERR "the observation in the phenotype object: " . Dumper($observation) . "\n"; + if ($observation eq "") { $observation = undef; } # special case, not sure where it comes from + $phenotype_object->phenotype_id($observation); + my $image_id = $value_array->[4]; + + if (defined($image_id) && ($image_id eq "")) { $image_id = undef; } + + $phenotype_object->image_id($image_id); + my $additional_info = $value_array->[5] || undef; + my $external_references = $value_array->[6] || undef; + + my $unique_time = $timestamp && defined($timestamp) ? $timestamp : $upload_date; + # print STDERR "the unique time in the phenotype object: $unique_time\n"; + $phenotype_object->unique_time($unique_time); + my $existing_trait_value = $self->unique_trait_stock->{$trait_cvterm->cvterm_id(), $stock_id}; + $phenotype_object->existing_trait_value($existing_trait_value); + + $phenotype_object->cvterm_name($trait_cvterm->name()); + $phenotype_object->cvterm_id($trait_cvterm->cvterm_id()); + $phenotype_object->experiment($experiment); + # print STDERR "Existing value $existing_trait_value. New value: ".$phenotype_object->value()."\n"; + + if (defined($trait_value) && (length($trait_value) || $remove_values)) { + if ($ignore_new_values) { + # print STDERR "ignoring new vlaues ...\n"; + if (exists($self->unique_trait_stock->{$trait_cvterm->cvterm_id(), $stock_id})) { + $skip_count++; + next; + } + } + } my $plot_trait_uniquename = "stock: " . - $stock_id . ", trait: " . - $trait_cvterm->name . - ", date: $unique_time" . - ", operator: $operator"; - - # Remove previous phenotype values for a given stock and trait if $overwrite values is checked, otherwise skip to next - if ($overwrite_values) { - if (exists($check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id})) { - - #skip when observation is provided since overwriting doesn't create records it updates observations. - if (!$observation) { - push @{$trait_and_stock_to_overwrite{traits}}, $trait_cvterm->cvterm_id(); - push @{$trait_and_stock_to_overwrite{stocks}}, $stock_id; - } - $plot_trait_uniquename .= ", overwritten: $upload_date"; - if ( defined($trait_value) && length($trait_value) ) { - $overwrite_count++; - } - elsif ( $existing_trait_value ne "" ) { - $remove_count++; - } - } elsif ( length($trait_value) ) { - $new_count++; - } - $check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id} = 1; - } else { - if (!$allow_repeat_measures && exists($check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id})) { - $skip_count++; - next; - } else { - $new_count++; - } - } - - my $phenotype; - if ($observation) { - $phenotype = $trait_cvterm->find_related("phenotype_cvalues", { - observable_id => $trait_cvterm->cvterm_id, - phenotype_id => $observation, - }); - - ## should check that unit and variable (also checked here) are conserved in parse step, if not reject before store - ## should also update operator in nd_experimentprops - - $phenotype->update({ - value => $trait_value, - uniquename => $plot_trait_uniquename, - }); - - $self->handle_timestamp($timestamp, $observation); - $self->handle_operator($operator, $observation); - - my $q = "SELECT phenotype_id, nd_experiment_id, file_id - FROM phenotype - JOIN nd_experiment_phenotype using(phenotype_id) - JOIN nd_experiment_stock using(nd_experiment_id) - LEFT JOIN phenome.nd_experiment_md_files using(nd_experiment_id) - JOIN stock using(stock_id) - WHERE stock.stock_id=? - AND phenotype.cvalue_id=?"; - - my $h = $self->bcs_schema->storage->dbh()->prepare($q); - $h->execute($stock_id, $trait_cvterm->cvterm_id); - while (my ($phenotype_id, $nd_experiment_id, $file_id) = $h->fetchrow_array()) { - push @overwritten_values, [ $file_id, $phenotype_id, $nd_experiment_id ]; - $experiment_ids{$nd_experiment_id} = 1; - if ($image_id) { - $nd_experiment_md_images{$nd_experiment_id} = $image_id; - } - } - - } - else { - - $phenotype = $trait_cvterm->create_related("phenotype_cvalues", { - observable_id => $trait_cvterm->cvterm_id, - value => $trait_value, - uniquename => $plot_trait_uniquename, - }); - - $self->handle_timestamp($timestamp, $phenotype->phenotype_id); - $self->handle_operator($operator, $phenotype->phenotype_id); - - $experiment->create_related('nd_experiment_phenotypes', { - phenotype_id => $phenotype->phenotype_id - }); - - # $experiment->find_or_create_related({ - # nd_experiment_phenotypes => [{phenotype_id => $phenotype->phenotype_id}] - # }); - - $experiment_ids{$experiment->nd_experiment_id()} = 1; - if ($image_id) { - $nd_experiment_md_images{$experiment->nd_experiment_id()} = $image_id; - } - } - my $additional_info_stored; - if($additional_info){ - my $pheno_additional_info = $schema->resultset("Phenotype::Phenotypeprop")->find_or_create({ - phenotype_id => $phenotype->phenotype_id, - type_id => $phenotype_addtional_info_type_id, - }); - $pheno_additional_info = $pheno_additional_info->update({ - value => encode_json $additional_info, - }); - $additional_info_stored = $pheno_additional_info->value ? decode_json $pheno_additional_info->value : undef; - } - my $external_references_stored; - if($external_references){ - my $phenotype_external_references = $schema->resultset("Phenotype::Phenotypeprop")->find_or_create({ - phenotype_id => $phenotype->phenotype_id, - type_id => $external_references_type_id, - }); - $phenotype_external_references = $phenotype_external_references->update({ - value => encode_json $external_references, - }); - $external_references_stored = $phenotype_external_references->value ? decode_json $phenotype_external_references->value : undef; - } - - my $observationVariableDbId = $trait_cvterm->cvterm_id; - my $observation_id = $phenotype->phenotype_id; - my %details = ( - "germplasmDbId"=> qq|$linked_data{$plot_name}->{germplasmDbId}|, - "germplasmName"=> $linked_data{$plot_name}->{germplasmName}, - "observationDbId"=> qq|$observation_id|, - "observationLevel"=> $linked_data{$plot_name}->{observationLevel}, - "observationUnitDbId"=> qq|$linked_data{$plot_name}->{observationUnitDbId}|, - "observationUnitName"=> $linked_data{$plot_name}->{observationUnitName}, - "observationVariableDbId"=> qq|$observationVariableDbId|, - "observationVariableName"=> $trait_cvterm->name, - "studyDbId"=> qq|$project_id|, - "uploadedBy"=> $operator ? $operator : "", - "additionalInfo" => $additional_info_stored, - "externalReferences" => $external_references_stored, - "value" => $trait_value - ); - - if ($timestamp) { $details{'observationTimeStamp'} = $timestamp}; - if ($operator) { $details{'collector'} = $operator}; - - push @stored_details, \%details; + $stock_id . ", trait: " . + $trait_cvterm->name . + ", date: $unique_time" . + ", operator: $operator" . + ", count: $value_count" . + ", observation: $observation"; + + print STDERR "phenotype uniquename: $plot_trait_uniquename\n"; + + $phenotype_object->uniquename($plot_trait_uniquename); + + # Remove previous phenotype values for a given stock and trait if $overwrite values is checked, otherwise skip to next + if ($overwrite_values) { + if (exists($self->unique_trait_stock->{$trait_cvterm->cvterm_id(), $stock_id})) { + #skip when observation is provided since overwriting doesn't create records it updates observations. + if (!$observation) { + push @{$trait_and_stock_to_overwrite{traits}}, $trait_cvterm->cvterm_id(); + push @{$trait_and_stock_to_overwrite{stocks}}, $stock_id; + } + $plot_trait_uniquename .= ", overwritten: $upload_date"; + if ( defined($trait_value) && length($trait_value) ) { + $overwrite_count++; + }elsif ( $existing_trait_value ne "" ) { + $remove_count++; + } + } elsif ( length($trait_value) ) { + $new_count++; + } + $self->unique_trait_stock->{$trait_cvterm->cvterm_id(), $stock_id} = 1; + } else { + if (!$allow_repeat_measures && exists($self->unique_trait_stock->{$trait_cvterm->cvterm_id(), $stock_id})) { + # print STDERR "skipping this value because (NO REPEAT MEASURES!)\n"; + $skip_count++; + next; + } else { + $new_count++; + } + } + + if ( !length($trait_value) && !$remove_values && $existing_trait_value ne "" ) { + $skip_count++; + next; + } + + $phenotype_object->store(); + + $experiment_ids{$experiment->nd_experiment_id()} = 1; + if ($image_id) { + $nd_experiment_md_images{$experiment->nd_experiment_id()} = $image_id; + } + + my $additional_info_stored; + if($additional_info){ + # my $pheno_additional_info = $schema->resultset("Phenotype::Phenotypeprop")->find_or_create({ + # phenotype_id => $phenotype->phenotype_id, + # type_id => $phenotype_addtional_info_type_id, + # }); + # $pheno_additional_info = $pheno_additional_info->update({ + # value => encode_json $additional_info, + # }); + # $additional_info_stored = $pheno_additional_info->value ? decode_json $pheno_additional_info->value : undef; + $additional_info_stored = $phenotype_object->store_additional_info($additional_info); + } + my $external_references_stored; + + # print STDERR "external references from phenotype package: ".Dumper($external_references); + + if ($external_references) { + # my $phenotype_external_references = $schema->resultset("Phenotype::Phenotypeprop")->find_or_create({ + # phenotype_id => $phenotype->phenotype_id, + # type_id => $external_references_type_id, + # }); + # $phenotype_external_references = $phenotype_external_references->update({ + # value => encode_json $external_references, + # }); + $external_references_stored = $phenotype_object->store_external_references($external_references); + } + + my $observationVariableDbId = $trait_cvterm->cvterm_id; + my $observation_id = $phenotype_object->phenotype_id; + my %details = ( + "germplasmDbId"=> qq|$linked_data{$plot_name}->{germplasmDbId}|, + "germplasmName"=> $linked_data{$plot_name}->{germplasmName}, + "observationDbId"=> qq|$observation_id|, + "observationLevel"=> $linked_data{$plot_name}->{observationLevel}, + "observationUnitDbId"=> qq|$linked_data{$plot_name}->{observationUnitDbId}|, + "observationUnitName"=> $linked_data{$plot_name}->{observationUnitName}, + "observationVariableDbId"=> qq|$observationVariableDbId|, + "observationVariableName"=> $trait_cvterm->name, + "studyDbId"=> qq|$project_id|, + "uploadedBy"=> $operator ? $operator : "", + "additionalInfo" => $additional_info_stored, + "externalReferences" => $external_references_stored, + "value" => $trait_value + ); + + if ($timestamp) { $details{'observationTimeStamp'} = $timestamp}; + if ($operator) { $details{'collector'} = $operator}; + + push @stored_details, \%details; + + $value_count++; } - elsif ( !length($trait_value) && !$remove_values && $existing_trait_value ne "" ) { - $skip_count++; - } - } - } - } - - if (scalar(keys %trait_and_stock_to_overwrite) > 0) { - my @saved_nd_experiment_ids = keys %experiment_ids; - push @overwritten_values, $self->delete_previous_phenotypes(\%trait_and_stock_to_overwrite, \@saved_nd_experiment_ids); - } - - $success_message = 'All values in your file have been successfully processed!

    '; - $success_message .= "$new_count new values stored
    "; - $success_message .= "$skip_count previously stored values skipped
    "; - $success_message .= "$overwrite_count previously stored values overwritten
    "; - $success_message .= "$remove_count previously stored values removed

    "; - my %files_with_overwritten_values = map {$_->[0] => 1} @overwritten_values; - my $obsoleted_files = $self->check_overwritten_files_status(keys %files_with_overwritten_values); - if (scalar (@$obsoleted_files) > 0){ - $success_message .= ' The following previously uploaded files are now obsolete because all values from them were overwritten by your upload: '; - foreach (@$obsoleted_files){ - $success_message .= " ".$_->[1]; - } - } + } + } + } + + # print STDERR "the trait stock\n"; + + if (scalar(keys %trait_and_stock_to_overwrite) > 0) { + my @saved_nd_experiment_ids = keys %experiment_ids; + push @overwritten_values, $self->delete_previous_phenotypes(\%trait_and_stock_to_overwrite, \@saved_nd_experiment_ids); + } + + $success_message = 'All values in your file have been successfully processed!

    '; + $success_message .= "$new_count new values stored
    "; + $success_message .= "$skip_count previously stored values skipped
    "; + $success_message .= "$overwrite_count previously stored values overwritten
    "; + $success_message .= "$remove_count previously stored values removed

    "; + my %files_with_overwritten_values = map {$_->[0] => 1} @overwritten_values; + my $obsoleted_files = $self->check_overwritten_files_status(keys %files_with_overwritten_values); + if (scalar (@$obsoleted_files) > 0){ + $success_message .= ' The following previously uploaded files are now obsolete because all values from them were overwritten by your upload: '; + foreach (@$obsoleted_files){ + $success_message .= " ".$_->[1]; + } + } }; try { @@ -825,20 +1027,21 @@ sub store { } catch { $transaction_error = $_; }; - + if ($transaction_error) { $error_message = $transaction_error; - print STDERR "Transaction error storing phenotypes: $transaction_error\n"; + # print STDERR "Transaction error storing phenotypes: $transaction_error\n"; return ($error_message, $success_message); } - + if ($archived_file) { $self->save_archived_file_metadata($archived_file, $archived_file_type, \%experiment_ids); } + if (scalar(keys %nd_experiment_md_images) > 0) { $self->save_archived_images_metadata(\%nd_experiment_md_images); } - + return ($error_message, $success_message, \@stored_details); } @@ -846,18 +1049,34 @@ sub store_stock_note { my $self = shift; my $stock_id = shift; my $note_array = shift; + # print STDERR "the note array is: " . Dumper($note_array) . "\n"; my $operator = shift; - my $note = $note_array->[0]; - my $timestamp = $note_array->[1]; - $operator = $note_array->[2] ? $note_array->[2] : $operator; - print STDERR "Stock_id is $stock_id and note in sub is $note, timestamp is $timestamp, operator is $operator\n"; + if (ref($note_array->[0]) eq 'ARRAY'){ #this block will execute, if there a multiple notes, this is in the case of repetitive values for the same observationUnitName!! + foreach my $note_entry (@$note_array) { + my ($note, $timestamp, $notes_operator) = @$note_entry; + $notes_operator = defined $notes_operator ? $notes_operator : $operator; + # print STDERR "multiple notes value: $note, timestamp: $timestamp, operator: $notes_operator\n"; - $note = $note ." (Operator: $operator, Time: $timestamp)"; - my $stock = $self->bcs_schema()->resultset("Stock::Stock")->find( { stock_id => $stock_id } ); - $stock->create_stockprops( { 'notes' => $note } ); -} + #the note with operator and timestamp + my $full_note = $note ."(Operator: $notes_operator, Time: $timestamp)"; + + my $stock = $self->bcs_schema()->resultset("Stock::Stock")->find({ stock_id => $stock_id }); + $stock->create_stockprops({ 'notes' => $full_note }); + # print STDERR "multiple notes : $full_note\n"; + } + }else{ #this will execute if there is a single notes !! + my ($note, $timestamp, $notes_operator) = @$note_array; + $notes_operator = defined $notes_operator ? $notes_operator : $operator; + # print STDERR "single notes values $note, timestamp: $timestamp, operator: $notes_operator\n"; + $note = $note ." (Operator: $notes_operator, Time: $timestamp)"; + + my $stock = $self->bcs_schema()->resultset("Stock::Stock")->find( { stock_id => $stock_id } ); + $stock->create_stockprops( { 'notes' => $note } ); + # print STDERR "Stored note for a single notes: $note\n"; + } +} sub store_high_dimensional_data { @@ -885,7 +1104,7 @@ sub store_high_dimensional_data { $dbh = $self->bcs_schema->storage->dbh()->prepare($protocol_query); $dbh->execute($nd_experiment_id,$protocol_id); - print STDERR "[StorePhenotypes] Linked $md_json_type json with id $json_id to nd_experiment $nd_experiment_id to protocol $protocol_id\n"; + # print STDERR "[StorePhenotypes] Linked $md_json_type json with id $json_id to nd_experiment $nd_experiment_id to protocol $protocol_id\n"; } sub delete_previous_phenotypes { @@ -947,13 +1166,13 @@ sub check_overwritten_files_status { if ($_){ $h->execute($_); my $count = $h->fetchrow; - print STDERR "COUNT $count \n"; + # print STDERR "COUNT $count \n"; if ($count == 0){ $h2->execute($_); $h3->execute($_); my $basename = $h3->fetchrow; push @obsoleted_files, [$_, $basename]; - print STDERR "MADE file_id $_ OBSOLETE\n"; + # print STDERR "MADE file_id $_ OBSOLETE\n"; } } } @@ -1083,6 +1302,26 @@ sub handle_operator { $h->execute($operator, $phenotype_id); } +sub get_trait_props { + my $self = shift; + my $cvterm_id = shift; + my $property_name = shift; + + my %property_by_cvterm_id; + my $sql = "SELECT cvtermprop.value, cvterm.cvterm_id, cvterm.name FROM cvterm join cvtermprop on(cvterm.cvterm_id=cvtermprop.cvterm_id) join cvterm as proptype on(cvtermprop.type_id=proptype.cvterm_id) where proptype.name=? "; + my $sth= $self->bcs_schema()->storage()->dbh()->prepare($sql); + $sth->execute($property_name); + while (my ($property_value, $cvterm_id, $cvterm_name) = $sth->fetchrow_array) { + if (defined $property_value) { + $property_by_cvterm_id{$cvterm_id} = $property_value; + } else { + # print STDERR "Warning: property '$property_name' not found for trait '$cvterm_name' (cvterm_id: '$cvterm_id') is not defined \n"; + } + } + # print STDERR "PROPERTIES FROM $property_name: ".Dumper(\%property_by_cvterm_id); + return \%property_by_cvterm_id; +} + ### 1; ### diff --git a/lib/CXGN/Project.pm b/lib/CXGN/Project.pm index 4bbcd9461e..4e7fe5da26 100644 --- a/lib/CXGN/Project.pm +++ b/lib/CXGN/Project.pm @@ -2764,7 +2764,7 @@ sub obsolete_additional_uploaded_file { -=head2 function get_phenotypes_for_trait($trait_id) +=head2 function get_phenotypes_for_trait($trait_id, $stock_type, $start_date, $end_date) Usage: Desc: returns the measurements for the given trait in this trial as an array of values, e.g. [2.1, 2, 50] @@ -2781,6 +2781,14 @@ sub get_phenotypes_for_trait { my $stock_type = shift; my @data; my $dbh = $self->bcs_schema->storage()->dbh(); + my $start_date = shift; + my $end_date = shift; + my $date_sql = ''; + my @date_placeholders; + if ($start_date && $end_date) { + $date_sql = " AND (collect_date > ? and collect_date < ?)"; + @date_placeholders = ($start_date, $end_date); + } #my $schema = $self->bcs_schema(); my $h; @@ -2791,11 +2799,11 @@ sub get_phenotypes_for_trait { $join_string = 'JOIN nd_experiment_stock USING(nd_experiment_id) JOIN stock USING(stock_id)'; $where_string = "stock.type_id=$stock_type_id and"; } - my $q = "SELECT phenotype.value::real FROM cvterm JOIN phenotype ON (cvterm_id=cvalue_id) JOIN nd_experiment_phenotype USING(phenotype_id) JOIN nd_experiment_project USING(nd_experiment_id) $join_string WHERE $where_string project_id=? and cvterm.cvterm_id = ? and phenotype.value~? ORDER BY phenotype_id ASC;"; + my $q = "SELECT phenotype.value::real FROM cvterm JOIN phenotype ON (cvterm_id=cvalue_id) JOIN nd_experiment_phenotype USING(phenotype_id) JOIN nd_experiment_project USING(nd_experiment_id) $join_string WHERE $where_string project_id=? and cvterm.cvterm_id = ? and phenotype.value~? $date_sql ORDER BY phenotype_id ASC;"; $h = $dbh->prepare($q); my $numeric_regex = '^-?[0-9]+([,.][0-9]+)?$'; - $h->execute($self->get_trial_id(), $trait_id, $numeric_regex ); + $h->execute($self->get_trial_id(), $trait_id, $numeric_regex, @date_placeholders ); while (my ($value) = $h->fetchrow_array()) { push @data, $value + 0; } @@ -2932,6 +2940,7 @@ sub get_traits_assayed { my $q; if ($stock_type) { + # print STDERR " the stock type here: = $stock_type\n"; my $stock_type_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($self->bcs_schema(), $stock_type, 'stock_type')->cvterm_id(); $q = "SELECT (((cvterm.name::text || '|'::text) || db.name::text) || ':'::text) || dbxref.accession::text AS trait, cvterm.cvterm_id, imaging_project.project_id, imaging_project.name, count(phenotype.value) FROM cvterm diff --git a/lib/CXGN/Trial/Download.pm b/lib/CXGN/Trial/Download.pm index 2dd065869f..8a27069584 100644 --- a/lib/CXGN/Trial/Download.pm +++ b/lib/CXGN/Trial/Download.pm @@ -1,6 +1,4 @@ -package CXGN::Trial::Download; - =head1 NAME CXGN::Trial::Download @@ -68,7 +66,7 @@ my $download = CXGN::Trial::Download->new({ phenotype_max_value => $phenotype_max_value, has_header=>$has_header, include_pedigree_parents=>$include_pedigree_parents -}); +}); my $error = $download->download(); my $file_name = "phenotype.$format"; $c->res->content_type('Application/'.$format); @@ -106,9 +104,11 @@ information, mapping to treatment_project_ids and trait_list. These keys can be ignored if you don't need them in the layout. As a XLS: + my $plugin = "TrialLayoutExcel"; As a CSV: + my $plugin = "TrialLayoutCSV"; my $download = CXGN::Trial::Download->new({ @@ -127,7 +127,6 @@ $c->res->content_type('Application/'.$format); $c->res->header('Content-Disposition', qq[attachment; filename="$file_name"]); my $output = read_file($tempfile); $c->res->body($output); - ------------------------------------------------------------------------------ For downloading the IGD sequencing facility spreadsheet (as used from @@ -154,8 +153,11 @@ $c->res->body($output); =head1 AUTHORS +Nick Morales + =cut +package CXGN::Trial::Download; use Moose; use Moose::Util::TypeConstraints; @@ -226,13 +228,15 @@ has 'filename' => (isa => 'Str', is => 'ro', predicate => 'has_filename', required => 1, ); - +has 'repetitive_measurements' => (isa => 'Str', is => 'rw' ); has 'file_metadata' => (isa => 'Str', is => 'rw', predicate => 'has_file_metadata'); has 'trial_stock_type' => (isa => 'Str', is => 'rw', predicate => 'has_trial_stock_type', required => 0); has 'field_crossing_data_order' => (isa => 'ArrayRef[Str]|Undef', is => 'rw', required => 0); has 'prop_id' => (isa => 'Int | Undef', is => 'rw', required => 0); has 'people_schema' => ( isa => 'Ref', is => 'rw'); has 'dbh' => (is => 'rw'); +has 'start_date' => ( isa => 'Str', is => 'rw'); +has 'end_date' => (isa => 'Str', is => 'rw'); sub BUILD { diff --git a/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeCSV.pm b/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeCSV.pm index 405a8079a4..9b9d01b7c1 100644 --- a/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeCSV.pm +++ b/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeCSV.pm @@ -94,6 +94,8 @@ sub download { my $include_pedigree_parents = $self->include_pedigree_parents(); my $search_type = $self->search_type(); + my $repetitive_measurements = $self->repetitive_measurements(); + $self->trial_download_log($trial_id, "trial phenotypes"); my @data; @@ -125,7 +127,8 @@ sub download { trait_contains=>$trait_contains, phenotype_min_value=>$phenotype_min_value, phenotype_max_value=>$phenotype_max_value, - include_pedigree_parents=>$include_pedigree_parents + include_pedigree_parents=>$include_pedigree_parents, + repetitive_measurements => $repetitive_measurements, ); @data = $phenotypes_search->get_phenotype_matrix(); } @@ -160,7 +163,11 @@ sub download { my $num_col = scalar(@$header); for (my $line =0; $line< @data; $line++) { my $columns = $data[$line]; - print $F join ',', map { $_ =~ s/"/""/g; qq!"$_"! } @$columns; + print $F join ',', map { + my $field = $_; + $field =~ s/"/""/g; + qq!"$field"!; + } @$columns; print $F "\n"; } close($F); diff --git a/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeCSVEntryNumbers.pm b/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeCSVEntryNumbers.pm index 61e4d5d2f8..1fc6f0abaa 100644 --- a/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeCSVEntryNumbers.pm +++ b/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeCSVEntryNumbers.pm @@ -9,7 +9,7 @@ CXGN::Trial::Download::Plugin::TrialPhenotypeCSVEntryNumbers This plugin module is loaded from CXGN::Trial::Download ------------------------------------------------------------------- +=head1 DESCRIPTION For downloading phenotypes in a matrix where columns contain the phenotypes and rows contain the observation unit (as used from @@ -24,40 +24,46 @@ if you don't need to filter by them. THIS PLUGIN IS MODIFIED TO INCLUDE A COLUMN FOR ACCESSION-LEVEL ENTRY NUMBERS As a CSV: -my $plugin = 'TrialPhenotypeCSVEntryNumbers'; + + my $plugin = 'TrialPhenotypeCSVEntryNumbers'; As a xls: -my $plugin = 'TrialPhenotypeExcelEntryNumbers'; - -my $download = CXGN::Trial::Download->new({ - bcs_schema => $schema, - trait_list => \@trait_list_int, - year_list => \@year_list, - location_list => \@location_list_int, - trial_list => \@trial_list_int, - accession_list => \@accession_list_int, - plot_list => \@plot_list_int, - plant_list => \@plant_list_int, - filename => $tempfile, - format => $plugin, - data_level => $data_level, - include_timestamp => $timestamp_option, - trait_contains => \@trait_contains_list, - phenotype_min_value => $phenotype_min_value, - phenotype_max_value => $phenotype_max_value, - has_header=>$has_header, - exclude_phenotype_outlier=>$exclude_phenotype_outlier, - include_pedigree_parents=>$include_pedigree_parents -}); -my $error = $download->download(); -my $file_name = "phenotype.$format"; -$c->res->content_type('Application/'.$format); -$c->res->header('Content-Disposition', qq[attachment; filename="$file_name"]); -my $output = read_file($tempfile); -$c->res->body($output); - - -=head1 AUTHORS + + my $plugin = 'TrialPhenotypeExcelEntryNumbers'; + +Then: + + my $download = CXGN::Trial::Download->new({ + bcs_schema => $schema, + trait_list => \@trait_list_int, + year_list => \@year_list, + location_list => \@location_list_int, + trial_list => \@trial_list_int, + accession_list => \@accession_list_int, + plot_list => \@plot_list_int, + plant_list => \@plant_list_int, + filename => $tempfile, + format => $plugin, + data_level => $data_level, + include_timestamp => $timestamp_option, + trait_contains => \@trait_contains_list, + phenotype_min_value => $phenotype_min_value, + phenotype_max_value => $phenotype_max_value, + has_header=>$has_header, + exclude_phenotype_outlier=>$exclude_phenotype_outlier, + include_pedigree_parents=>$include_pedigree_parents + }); + my $error = $download->download(); + my $file_name = "phenotype.$format"; + $c->res->content_type('Application/'.$format); + $c->res->header('Content-Disposition', qq[attachment; filename="$file_name"]); + my $output = read_file($tempfile); + $c->res->body($output); + + +=head1 AUTHOR + +David Waring =cut @@ -102,6 +108,7 @@ sub download { my $exclude_phenotype_outlier = $self->exclude_phenotype_outlier; my $include_pedigree_parents = $self->include_pedigree_parents(); my $search_type = $self->search_type(); + my $repetitive_measurements = $self->repetitive_measurements(); $self->trial_download_log($trial_id, "trial phenotypes"); @@ -134,7 +141,8 @@ sub download { trait_contains=>$trait_contains, phenotype_min_value=>$phenotype_min_value, phenotype_max_value=>$phenotype_max_value, - include_pedigree_parents=>$include_pedigree_parents + include_pedigree_parents=>$include_pedigree_parents, + repetitive_measurements => $repetitive_measurements, ); @data = $phenotypes_search->get_phenotype_matrix(); } @@ -193,7 +201,11 @@ sub download { my $entry_number = $trial_entry_numbers{$trial_id}{$stock_id}; splice(@$columns, $ENTRY_NUMBER_COLUMN, 0, $entry_number); } - print $F join ',', map { $_ =~ s/"/""/g; qq!"$_"! } @$columns; + print $F join ',', map { + my $field = $_; + $field =~ s/"/""/g; + qq!"$field"!; + } @$columns; print $F "\n"; } close($F); diff --git a/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeExcel.pm b/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeExcel.pm index 8526a80b18..e139c435d3 100644 --- a/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeExcel.pm +++ b/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeExcel.pm @@ -9,53 +9,54 @@ CXGN::Trial::Download::Plugin::TrialPhenotypeExcel This plugin module is loaded from CXGN::Trial::Download ------------------------------------------------------------------- +=head1 DESCRIPTION -For downloading phenotypes in a matrix where columns contain the phenotypes -and rows contain the observation unit (as used from -SGN::Controller::BreedersToolbox::Download->download_phenotypes_action which -is used from the wizard, trial detail page, and manage trials page for -downlading phenotypes): +For downloading phenotypes in a matrix where columns contain the phenotypes and rows contain the observation unit (as used from SGN::Controller::BreedersToolbox::Download->download_phenotypes_action which is used from the wizard, trial detail page, and manage trials page for downlading phenotypes): -There a number of optional keys for filtering down the phenotypes -(trait_list, year_list, location_list, etc). Keys can be entirely ignored -if you don't need to filter by them. +There are a number of optional keys for filtering down the phenotypes (trait_list, year_list, location_list, etc). Keys can be entirely ignored if you don't need to filter by them. As a CSV: -my $plugin = 'TrialPhenotypeCSV'; + + my $plugin = 'TrialPhenotypeCSV'; As a xls: -my $plugin = 'TrialPhenotypeExcel'; - -my $download = CXGN::Trial::Download->new({ - bcs_schema => $schema, - trait_list => \@trait_list_int, - year_list => \@year_list, - location_list => \@location_list_int, - trial_list => \@trial_list_int, - accession_list => \@accession_list_int, - plot_list => \@plot_list_int, - plant_list => \@plant_list_int, - filename => $tempfile, - format => $plugin, - data_level => $data_level, - include_timestamp => $timestamp_option, - exclude_phenotype_outlier => $exclude_phenotype_outlier, - trait_contains => \@trait_contains_list, - phenotype_min_value => $phenotype_min_value, - phenotype_max_value => $phenotype_max_value, - has_header=>$has_header -}); -my $error = $download->download(); -my $file_name = "phenotype.$format"; -$c->res->content_type('Application/'.$format); -$c->res->header('Content-Disposition', qq[attachment; filename="$file_name"]); -my $output = read_file($tempfile); -$c->res->body($output); + + my $plugin = 'TrialPhenotypeExcel'; + +Then: + + my $download = CXGN::Trial::Download->new({ + bcs_schema => $schema, + trait_list => \@trait_list_int, + year_list => \@year_list, + location_list => \@location_list_int, + trial_list => \@trial_list_int, + accession_list => \@accession_list_int, + plot_list => \@plot_list_int, + plant_list => \@plant_list_int, + filename => $tempfile, + format => $plugin, + data_level => $data_level, + include_timestamp => $timestamp_option, + exclude_phenotype_outlier => $exclude_phenotype_outlier, + trait_contains => \@trait_contains_list, + phenotype_min_value => $phenotype_min_value, + phenotype_max_value => $phenotype_max_value, + has_header => $has_header, + repetitive_measurements => 'average', # or 'first', 'last', 'all' + }); + my $error = $download->download(); + my $file_name = "phenotype.$format"; + $c->res->content_type('Application/'.$format); + $c->res->header('Content-Disposition', qq[attachment; filename="$file_name"]); + my $output = read_file($tempfile); + $c->res->body($output); =head1 AUTHORS +Nick Morales, Lukas Mueller, Dariusz Bienkowski + =cut use Moose::Role; @@ -93,8 +94,14 @@ sub download { my $phenotype_max_value = $self->phenotype_max_value(); my $exclude_phenotype_outlier = $self->exclude_phenotype_outlier; my $search_type = $self->search_type(); + my $phenotype_start_date = $self->phenotype_start_date(); + my $phenotype_end_date = $self->phenotype_end_date(); + my $repetitive_measurements_type = $self->repetitive_measurements_type(); + + $self->trial_download_log($trial_id, "trial phenotypes"); + my @data; if ($self->data_level() eq 'metadata'){ my $metadata_search = CXGN::Phenotypes::MetaDataMatrix->new( @@ -122,6 +129,9 @@ sub download { trait_contains=>$trait_contains, phenotype_min_value=>$phenotype_min_value, phenotype_max_value=>$phenotype_max_value, + phenotype_start_date => $phenotype_start_date, + phenotype_end_date => $phenotype_end_date, + repetitive_measurements_type => $repetitive_measurements_type, ); @data = $phenotypes_search->get_phenotype_matrix(); } diff --git a/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeExcelEntryNumbers.pm b/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeExcelEntryNumbers.pm index 8a60703be6..f202ade8c6 100644 --- a/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeExcelEntryNumbers.pm +++ b/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeExcelEntryNumbers.pm @@ -11,46 +11,53 @@ CXGN::Trial::Download::Plugin::TrialPhenotypeExcelEntryNumbers This plugin module is loaded from CXGN::Trial::Download ------------------------------------------------------------------- +=head1 DESCRIPTION This plugin extends the base TrialPhenotypeExcel plugin to include an additional column for trial-level accession entry numbers. As a CSV: -my $plugin = 'TrialPhenotypeCSVEntryNumbers'; + my $plugin = 'TrialPhenotypeCSVEntryNumbers'; + As a xls: -my $plugin = 'TrialPhenotypeExcelEntryNumbers'; - -my $download = CXGN::Trial::Download->new({ - bcs_schema => $schema, - trait_list => \@trait_list_int, - year_list => \@year_list, - location_list => \@location_list_int, - trial_list => \@trial_list_int, - accession_list => \@accession_list_int, - plot_list => \@plot_list_int, - plant_list => \@plant_list_int, - filename => $tempfile, - format => $plugin, - data_level => $data_level, - include_timestamp => $timestamp_option, - exclude_phenotype_outlier => $exclude_phenotype_outlier, - trait_contains => \@trait_contains_list, - phenotype_min_value => $phenotype_min_value, - phenotype_max_value => $phenotype_max_value, - has_header=>$has_header -}); -my $error = $download->download(); -my $file_name = "phenotype.$format"; -$c->res->content_type('Application/'.$format); -$c->res->header('Content-Disposition', qq[attachment; filename="$file_name"]); -my $output = read_file($tempfile); -$c->res->body($output); + + my $plugin = 'TrialPhenotypeExcelEntryNumbers'; + +Then: + + my $download = CXGN::Trial::Download->new({ + bcs_schema => $schema, + trait_list => \@trait_list_int, + year_list => \@year_list, + location_list => \@location_list_int, + trial_list => \@trial_list_int, + accession_list => \@accession_list_int, + plot_list => \@plot_list_int, + plant_list => \@plant_list_int, + filename => $tempfile, + format => $plugin, + data_level => $data_level, + include_timestamp => $timestamp_option, + exclude_phenotype_outlier => $exclude_phenotype_outlier, + trait_contains => \@trait_contains_list, + phenotype_min_value => $phenotype_min_value, + phenotype_max_value => $phenotype_max_value, + has_header=>$has_header, + repetitive_measurements => 'average', + }); + my $error = $download->download(); + my $file_name = "phenotype.$format"; + $c->res->content_type('Application/'.$format); + $c->res->header('Content-Disposition', qq[attachment; filename="$file_name"]); + my $output = read_file($tempfile); + $c->res->body($output); =head1 AUTHORS +David Waring + =cut use Moose::Role; @@ -95,6 +102,7 @@ sub download { my $phenotype_max_value = $self->phenotype_max_value(); my $exclude_phenotype_outlier = $self->exclude_phenotype_outlier; my $search_type = $self->search_type(); + my $repetitive_measurements = $self->repetitive_measurements(); $self->trial_download_log($trial_id, "trial phenotypes"); my @data; @@ -124,6 +132,7 @@ sub download { trait_contains=>$trait_contains, phenotype_min_value=>$phenotype_min_value, phenotype_max_value=>$phenotype_max_value, + repetitive_measurements => $repetitive_measurements, ); @data = $phenotypes_search->get_phenotype_matrix(); } diff --git a/lib/SGN/Controller/AJAX/TrialMetadata.pm b/lib/SGN/Controller/AJAX/TrialMetadata.pm index 6e82f5c572..6925d65b7e 100644 --- a/lib/SGN/Controller/AJAX/TrialMetadata.pm +++ b/lib/SGN/Controller/AJAX/TrialMetadata.pm @@ -303,9 +303,11 @@ sub traits_assayed : Chained('trial') PathPart('traits_assayed') Args(0) { sub trait_phenotypes : Chained('trial') PathPart('trait_phenotypes') Args(0) { my $self = shift; my $c = shift; - my $start_date = shift; - my $end_date = shift; - my $include_dateless_items = shift; + my $start_date = $c->req->param('start_date'); + my $end_date = $c->req->param('end_date'); + my $include_dateless_items = $c->req->param('include_dateless_items'); + + # print STDERR "trait_phenotypes START DATE $start_date; and the END DATE $end_date\n"; #get userinfo from db my $user = $c->user(); @@ -325,10 +327,13 @@ sub trait_phenotypes : Chained('trial') PathPart('trait_phenotypes') Args(0) { data_level => $display, trait_list=> [$trait], trial_list => [$c->stash->{trial_id}], - start_date => $start_date, - end_date => $end_date, - include_dateless_items => $include_dateless_items, + start_date => $start_date, + end_date => $end_date, + include_dateless_items => $include_dateless_items, ); + + # print STDERR "get data \n"; + my @data = $phenotypes_search->get_phenotype_matrix(); $c->stash->{rest} = { status => "success", @@ -346,12 +351,16 @@ sub phenotype_summary : Chained('trial') PathPart('phenotypes') Args(0) { my $trial_id = $c->stash->{trial_id}; my $display = $c->req->param('display'); my $trial_stock_type = $c->req->param('trial_stock_type'); + my $start_date = $c->req->param('start_date'); + my $end_date = $c->req->param('end_date'); + my $include_dateless_items = $c->req->param('include_dateless_items'); my $select_clause_additional = ''; my $group_by_additional = ''; my $order_by_additional = ''; my $stock_type_id; my $rel_type_id; my $total_complete_number; + # print STDERR "trial phenotypes: START DATE: $start_date. END DATE: $end_date, INLCUDE DATELESS $include_dateless_items, DIPLAY = $display\n"; if ($display eq 'plots') { $stock_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plot', 'stock_type')->cvterm_id(); $rel_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plot_of', 'stock_relationship')->cvterm_id(); @@ -419,7 +428,30 @@ sub phenotype_summary : Chained('trial') PathPart('phenotypes') Args(0) { $trial_stock_type_id = $accesion_type_id; } - my $h = $dbh->prepare("SELECT (((cvterm.name::text || '|'::text) || db.name::text) || ':'::text) || dbxref.accession::text AS trait, + my $date_params = ""; + my @date_placeholders = (); + my $datelessq = ""; + + if ($include_dateless_items) { + $datelessq = " ( collect_date IS NULL) "; + } + if ($start_date && $end_date) { + $start_date =~ s/(.*)[ T]+.*$/$1/g; + $end_date =~ s/(.*)[ T]+.*$/$1/g; + + # print STDERR "START DATE $start_date END DATE: $end_date\n"; + if ($datelessq) { + $date_params = " AND ( $datelessq OR ( collect_date::date >= ? and collect_date::date <= ?)) "; + } + else { + $date_params = " AND ( collect_date::date >= ? and collect_date::date <= ?) "; + } + @date_placeholders = ($start_date, $end_date); + } + + # print STDERR "date params : $date_params\n"; + + my $q1 = "SELECT (((cvterm.name::text || '|'::text) || db.name::text) || ':'::text) || dbxref.accession::text AS trait, cvterm.cvterm_id, count(phenotype.value), to_char(avg(phenotype.value::real), 'FM999990.990'), @@ -441,18 +473,26 @@ sub phenotype_summary : Chained('trial') PathPart('phenotypes') Args(0) { AND stock_relationship.type_id=? AND plot.type_id=? AND accession.type_id=? + $date_params GROUP BY (((cvterm.name::text || '|'::text) || db.name::text) || ':'::text) || dbxref.accession::text, cvterm.cvterm_id $group_by_additional ORDER BY cvterm.name ASC - $order_by_additional;"); + $order_by_additional"; + + # print STDERR "run numeric query: $q1\n"; + my $h1 = $dbh->prepare($q1); + my $numeric_regex = '^-?[0-9]+([,.][0-9]+)?$'; - $h->execute($c->stash->{trial_id}, $numeric_regex, $rel_type_id, $stock_type_id, $trial_stock_type_id); + + # print STDERR "TRIAL ID = ".$c->stash->{trial_id}." REGEX: $numeric_regex REL_TYPE_ID $rel_type_id STOCK TYPE ID $stock_type_id DATE PLACE HOLDERS: ".join(", ", @date_placeholders)."\n"; + + $h1->execute($c->stash->{trial_id}, $numeric_regex, $rel_type_id, $stock_type_id, $trial_stock_type_id, @date_placeholders); my @phenotype_data; my @numeric_trait_ids; - while (my ($trait, $trait_id, $count, $average, $max, $min, $stddev, $stock_name, $stock_id) = $h->fetchrow_array()) { + while (my ($trait, $trait_id, $count, $average, $max, $min, $stddev, $stock_name, $stock_id) = $h1->fetchrow_array()) { push @numeric_trait_ids, $trait_id; @@ -492,10 +532,11 @@ sub phenotype_summary : Chained('trial') PathPart('phenotypes') Args(0) { $exclude_numeric_trait_ids = " AND cvterm.cvterm_id NOT IN (".join(",", @numeric_trait_ids).")"; } + # print STDERR "run the non-numeric query\n"; my $q = "SELECT (((cvterm.name::text || '|'::text) || db.name::text) || ':'::text) || dbxref.accession::text AS trait, cvterm.cvterm_id, count(phenotype.value) - $select_clause_additional + $select_clause_additional FROM cvterm JOIN phenotype ON (cvterm_id=cvalue_id) JOIN nd_experiment_phenotype USING(phenotype_id) @@ -509,14 +550,17 @@ sub phenotype_summary : Chained('trial') PathPart('phenotypes') Args(0) { AND stock_relationship.type_id=? AND plot.type_id=? AND accession.type_id=? + $date_params $exclude_numeric_trait_ids GROUP BY (((cvterm.name::text || '|'::text) || db.name::text) || ':'::text) || dbxref.accession::text, cvterm.cvterm_id $group_by_additional ORDER BY cvterm.name ASC $order_by_additional "; + # print STDERR "QUERY = $q\n"; + my $h = $dbh->prepare($q); - $h->execute($c->stash->{trial_id}, $rel_type_id, $stock_type_id, $trial_stock_type_id); + $h->execute($c->stash->{trial_id}, $rel_type_id, $stock_type_id, $trial_stock_type_id, @date_placeholders); while (my ($trait, $trait_id, $count, $stock_name, $stock_id) = $h->fetchrow_array()) { my @return_array; @@ -532,8 +576,10 @@ sub trait_histogram : Chained('trial') PathPart('trait_histogram') Args(1) { my $c = shift; my $trait_id = shift; my $stock_type = $c->req->param('stock_type') || 'plot'; + my $start_date = $c->req->param('start_date'); + my $end_date = $c->req->param('end_date'); - my @data = $c->stash->{trial}->get_phenotypes_for_trait($trait_id, $stock_type); + my @data = $c->stash->{trial}->get_phenotypes_for_trait($trait_id, $stock_type, $start_date, $end_date); $c->stash->{rest} = { data => \@data }; } @@ -5437,5 +5483,40 @@ sub delete_all_genotyping_plates_in_project : Chained('trial') PathPart('delete_ $c->stash->{rest} = { success => 1 }; } +sub trial_collect_date_range :Chained('trial') :PathPart('collect_date_range') Args(0) { + my $self = shift; + my $c = shift; + my $trial_id = $c->stash->{trial_id}; + my $cvterm_id = $c->req->param('cvterm_id'); + + my $cvterm_clause = ""; + + if ($cvterm_id) { + $cvterm_clause = " and cvterm_id = ?"; + } + + my $q = "select min(collect_date), max(collect_date), project_id from nd_experiment_project join nd_experiment_phenotype using(nd_experiment_id) join phenotype using(phenotype_id) join cvterm on(cvalue_id=cvterm_id) where nd_experiment_project.project_id=? $cvterm_clause group by nd_experiment_project.project_id"; + my $dbh = $c->dbc->dbh; + my $h = $dbh->prepare($q); + if ($cvterm_id) { + $h->execute($trial_id, $cvterm_id); + } + else { + $h->execute($trial_id); + } + + my ($start_date, $end_date, $project_id) = $h->fetchrow_array(); + + if (! $project_id) { + $c->stash->{rest} = { error => "Trial with id $trial_id does not exist" }; + return; + } + + # print STDERR "collect_date_range: START DATE $start_date, END DATE $end_date\n"; + $c->stash->{rest} = { trial_id => $trial_id, + start_date => $start_date, + end_date => $end_date, + }; +} 1; diff --git a/lib/SGN/Controller/BreedersToolbox/Download.pm b/lib/SGN/Controller/BreedersToolbox/Download.pm index 3b11f2e111..02b8c7d392 100644 --- a/lib/SGN/Controller/BreedersToolbox/Download.pm +++ b/lib/SGN/Controller/BreedersToolbox/Download.pm @@ -185,7 +185,7 @@ sub breeder_download : Path('/breeders/download/') Args(0) { sub _parse_list_from_json { my $list_json = shift; # print STDERR "LIST JSON: ". Dumper $list_json; - my $json = new JSON; + my $json = JSON->new(); if ($list_json) { # my $decoded_list = $json->allow_nonref->relaxed->escape_slash->loose->allow_singlequote->allow_barekey->decode($list_json); my $decoded_list = decode_json($list_json); @@ -222,6 +222,7 @@ sub download_phenotypes_action : Path('/breeders/trials/phenotype/download') Arg my $search_type = $c->req->param("speed") && $c->req->param("speed") ne 'null' ? $c->req->param("speed") : "Native"; my $format = $c->req->param("format") && $c->req->param("format") ne 'null' ? $c->req->param("format") : "xlsx"; my $data_level = $c->req->param("dataLevel") && $c->req->param("dataLevel") ne 'null' ? $c->req->param("dataLevel") : "plot"; + my $repetitive_measurements = $c->req->param("repetitive_measurements") || "average"; my $timestamp_option = $c->req->param("timestamp") && $c->req->param("timestamp") ne 'null' ? $c->req->param("timestamp") : 0; my $entry_numbers_option = $c->req->param("entry_numbers") && $c->req->param("entry_numbers") ne 'null' ? $c->req->param("entry_numbers") : 0; my $exclude_phenotype_outlier = $c->req->param("exclude_phenotype_outlier") && $c->req->param("exclude_phenotype_outlier") ne 'null' && $c->req->param("exclude_phenotype_outlier") ne 'undefined' ? $c->req->param("exclude_phenotype_outlier") : 0; @@ -237,6 +238,8 @@ sub download_phenotypes_action : Path('/breeders/trials/phenotype/download') Arg my $trait_contains = $c->req->param("trait_contains"); my $phenotype_min_value = $c->req->param("phenotype_min_value") && $c->req->param("phenotype_min_value") ne 'null' ? $c->req->param("phenotype_min_value") : ""; my $phenotype_max_value = $c->req->param("phenotype_max_value") && $c->req->param("phenotype_max_value") ne 'null' ? $c->req->param("phenotype_max_value") : ""; + my $phenotype_start_date = $c->req->param("phenotype_start_date"); + my $phenotype_end_date = $c->req->param("phenotype_end_date"); my @trait_list; if ($trait_list && $trait_list ne 'null') { @@ -415,9 +418,13 @@ sub download_phenotypes_action : Path('/breeders/trials/phenotype/download') Arg phenotype_min_value => $phenotype_min_value, phenotype_max_value => $phenotype_max_value, has_header => $has_header, - search_type => $search_type + search_type => $search_type, + repetitive_measurements => $repetitive_measurements, + phenotype_start_date => $phenotype_start_date, + phenotype_end_date => $phenotype_end_date, }); + # print STDERR "Repetitive_measurements option recieved" .$repetitive_measurements ."\n"; my $error = $download->download(); $c->res->content_type('Application/'.$format); @@ -430,7 +437,7 @@ sub download_phenotypes_action : Path('/breeders/trials/phenotype/download') Arg #Deprecated. Look to download_phenotypes_action -#sub download_trial_phenotype_action : Path('/breeders/trial/phenotype/download') Args(1) { +#sub download_trial_phenotype_action : Path('/breeders/trial/phenotype/downoad') Args(1) { # my $self = shift; # my $c = shift; # my $trial_id = shift; @@ -584,7 +591,7 @@ sub download_action : Path('/breeders/download_action') Args(0) { accession_list=>$accession_id_data->{transform}, include_timestamp=>$timestamp_included, exclude_phenotype_outlier=>$exclude_phenotype_outlier, - dataset_exluded_outliers=>$outliers, + dataset_excluded_outliers=>$outliers, data_level=>$datalevel, ); @data = $phenotypes_search->get_phenotype_matrix(); @@ -627,26 +634,26 @@ sub download_action : Path('/breeders/download_action') Args(0) { if ($format eq ".csv") { #build csv with column names - open(CSV, "> :encoding(UTF-8)", $tempfile) || die "Can't open file $tempfile\n"; - my @header = @{$data[0]}; - my $num_col = scalar(@header); - for (my $line =0; $line< @data; $line++) { - my @columns = @{$data[$line]}; - my $step = 1; - for(my $i=0; $i<$num_col; $i++) { - if (defined($columns[$i])) { - print CSV "\"$columns[$i]\""; - } else { - print CSV "\"\""; - } - if ($step < $num_col) { - print CSV ","; - } - $step++; + open(my $csv_fh, "> :encoding(UTF-8)", $tempfile) || die "Can't open file $tempfile\n"; + my @header = @{$data[0]}; + my $num_col = scalar(@header); + for (my $line =0; $line< @data; $line++) { + my @columns = @{$data[$line]}; + my $step = 1; + for(my $i=0; $i<$num_col; $i++) { + if (defined($columns[$i])) { + print $csv_fh "\"$columns[$i]\""; + } else { + print $csv_fh "\"\""; } - print CSV "\n"; + if ($step < $num_col) { + print $csv_fh ","; + } + $step++; } - close CSV; + print $csv_fh "\n"; + } + close $csv_fh; } else { my $ss = Excel::Writer::XLSX->new($tempfile); @@ -762,7 +769,7 @@ sub download_accession_properties_action : Path('/breeders/download_accession_pr my $file_name = basename($file_path); # Write to csv file - open(CSV, "> :encoding(UTF-8)", $file_path) || die "Can't open file $file_path\n"; + open(my $csv_fh, "> :encoding(UTF-8)", $file_path) || die "Can't open file $file_path\n"; my @header = @{$rows->[0]}; my $num_col = scalar(@header); @@ -771,18 +778,18 @@ sub download_accession_properties_action : Path('/breeders/download_accession_pr my $step = 1; for ( my $i = 0; $i < $num_col; $i++ ) { if ($columns->[$i]) { - print CSV "\"$columns->[$i]\""; + print $csv_fh "\"$columns->[$i]\""; } else { - print CSV "\"\""; + print $csv_fh "\"\""; } if ($step < $num_col) { - print CSV ","; + print $csv_fh ","; } $step++; } - print CSV "\n"; + print $csv_fh "\n"; } - close CSV; + close $csv_fh; # Return the csv file $c->res->content_type('text/csv'); diff --git a/lib/SGN/Controller/Cvterm.pm b/lib/SGN/Controller/Cvterm.pm index b690ba5468..5c4441111c 100644 --- a/lib/SGN/Controller/Cvterm.pm +++ b/lib/SGN/Controller/Cvterm.pm @@ -44,7 +44,7 @@ sub view_cvterm : Chained('get_cvterm') PathPart('view') Args(0) { $submitter = $logged_user->check_roles('submitter') if $logged_user; $sequencer = $logged_user->check_roles('sequencer') if $logged_user; my $props = $self->_cvtermprops($cvterm); - my $editable_cvterm_props = "trait_format,trait_default_value,trait_minimum,trait_maximum,trait_details,trait_categories"; + my $editable_cvterm_props = "trait_format,trait_default_value,trait_minimum,trait_maximum,trait_details,trait_categories,trait_repeat_type"; $c->stash( diff --git a/mason/breeders_toolbox/breeder_search_page.mas b/mason/breeders_toolbox/breeder_search_page.mas index e231c72751..c67b6d271a 100644 --- a/mason/breeders_toolbox/breeder_search_page.mas +++ b/mason/breeders_toolbox/breeder_search_page.mas @@ -6,6 +6,9 @@ $dataset_id => undef <& /util/import_javascript.mas, entries => ["wizard"] &> <& /util/import_css.mas, paths => ['wizard.css'] &> + + + <& /page/page_title.mas, title=>"Search Wizard" &>
    @@ -394,7 +397,7 @@ $dataset_id => undef
    @@ -402,22 +405,48 @@ $dataset_id => undef
    - +
    + + Repetitive measurements:
    + + + + +   + + + + From: + + + To: + + + include items without a date + + Trait Name Contains @@ -774,4 +803,38 @@ $dataset_id => undef $(".wizard-download-genotypes-chromosome-number").html(html); } + jQuery('input[title="phenotype_start_date"]').daterangepicker( + { + "singleDatePicker": true, + "showDropdowns": true, + "autoUpdateInput": true, + "startDate": "1960-01-01", + "yearSelect" : true, + "minDate": "1960-01-01", + "maxDate": "2030-12-31", + locale: { + format: 'YYYY-MM-DD' + } + }, + +); + + +jQuery('input[title="phenotype_end_date"]').daterangepicker( + { + "singleDatePicker": true, + "showDropdowns": true, + "autoUpdateInput": true, + //"startDate": "1960-01-01", + "yearSelect" : true, + "minDate": "1960-01-01", + "maxDate": "2030-12-31", + locale: { + format: 'YYYY-MM-DD' + } + }, + +); + + diff --git a/mason/breeders_toolbox/trial.mas b/mason/breeders_toolbox/trial.mas index 375de9e676..52508db4f9 100644 --- a/mason/breeders_toolbox/trial.mas +++ b/mason/breeders_toolbox/trial.mas @@ -102,9 +102,11 @@ $project_id => undef <& /page/detail_page_2_col_section.mas, trial_id => $trial_id, trial_name => $trial_name, info_section_title => "

    Experimental Design

    ", info_section_subtitle => 'View and add experimental design information. Add plant entries and tissue sample entries.', buttons_html => ' ', icon_class => "glyphicon glyphicon-list-alt", info_section_id => "trial_design_section", has_plant_entries => $has_plant_entries, has_subplot_entries => $has_subplot_entries, has_tissue_sample_entries => $has_tissue_sample_entries, trial_stock_type => $trial_stock_type &> +<& /page/detail_page_2_col_section.mas, trial_id => $trial_id, info_section_title => "

    Phenotype Raw Data

    ", info_section_subtitle => 'View phenotyping raw data.', info_section_id => "trial_raw_data", trial_name => $trial_name, site_project_name => $site_project_name, sgn_session_id => $sgn_session_id, user_name => $user_name, main_production_site_url => $main_production_site_url, trial_stock_type => $trial_stock_type &> + <& /page/detail_page_2_col_section.mas, trial_id => $trial_id, info_section_title => "

    Phenotype Summary Statistics

    ", info_section_subtitle => 'View and download uploaded phenotype data.', buttons_html => "", icon_class => "glyphicon glyphicon-equalizer", info_section_id => "trial_detail_traits_assayed", has_expression_atlas => $has_expression_atlas, trial_name => $trial_name, expression_atlas_url => $expression_atlas_url, site_project_name => $site_project_name, sgn_session_id => $sgn_session_id, user_name => $user_name, main_production_site_url => $main_production_site_url, trial_stock_type => $trial_stock_type &> -<& /page/detail_page_2_col_section.mas, trial_id => $trial_id, info_section_title => "

    Phenotype Raw Data

    ", info_section_subtitle => 'View phenotyping raw data.', info_section_id => "trial_raw_data", trial_name => $trial_name, site_project_name => $site_project_name, sgn_session_id => $sgn_session_id, user_name => $user_name, main_production_site_url => $main_production_site_url, trial_stock_type => $trial_stock_type &> +<& /page/detail_page_2_col_section.mas, trial_id => $trial_id, trial_name => $trial_name, site_projct_name => $site_project_name, sgn_session_id => $sgn_session_id, username=> $user_name, main_production_site_url => $main_production_site_url, trial_stock_type => $trial_stock_type, info_section_title => "

    Repetitive Measurements

    ", info_section_subtitle => 'View a plot by repetitive measurements level', info_section_id => "repetitive_measurements_data", icon_class => "glyphicon glyphicon-repeat" &> <& /page/detail_page_2_col_section.mas, trial_id => $trial_id, info_section_title => "

    Compute New Phenotypes

    ", info_section_subtitle => 'Compute derived traits or compute plot phenotypes from plant phenotypes.', icon_class => "glyphicon glyphicon-floppy-save", info_section_id => "compute_derived_traits" &> diff --git a/mason/breeders_toolbox/trial/download_phenotypes_dialog.mas b/mason/breeders_toolbox/trial/download_phenotypes_dialog.mas index 79e07ae2c1..9e5a9a40ae 100644 --- a/mason/breeders_toolbox/trial/download_phenotypes_dialog.mas +++ b/mason/breeders_toolbox/trial/download_phenotypes_dialog.mas @@ -65,6 +65,20 @@ $dialog_name => undef <&| /page/info_section.mas, title=>"Additional Search Options", collapsible => 1, collapsed=>1 &> +
    + +
    + +
    +
    +
    @@ -151,6 +165,7 @@ jQuery(document).ready(function() { var trial_id_array = trial_ids.split(","); var speed = jQuery("#download_trial_phenotypes_speed").val(); var format = jQuery("#download_trial_phenotypes_format").val(); + var repetitive_measurements = jQuery("#download_trial_phenotypes_repetitive_measurements_option").val(); var timestamp = jQuery("#download_trial_phenotypes_timestamp_option").val(); var entry_numbers = jQuery("#download_trial_phenotypes_entry_numbers_option").val(); var trait_contains = jQuery("#download_trial_phenotype_trait_contains").val(); @@ -162,7 +177,7 @@ jQuery(document).ready(function() { var phenotype_max_value = jQuery("#download_trial_phenotype_phenotype_max").val(); var exclude_phenotype_outlier = jQuery("#download_trial_phenotypes_exclude_outliers").val(); - window.open("/breeders/trials/phenotype/download?trial_list="+JSON.stringify(trial_id_array)+"&speed="+speed+"&format="+format+"×tamp="+timestamp+"&entry_numbers="+entry_numbers+"&trait_contains="+JSON.stringify(trait_contains_array)+"&trait_component_list="+JSON.stringify(trait_components)+"&trait_list="+JSON.stringify(traits)+"&dataLevel="+data_level+"&phenotype_min_value="+phenotype_min_value+"&phenotype_max_value="+phenotype_max_value+"&exclude_phenotype_outlier="+exclude_phenotype_outlier); + window.open("/breeders/trials/phenotype/download?trial_list="+JSON.stringify(trial_id_array)+"&speed="+speed+"&format="+format+"×tamp="+timestamp+"&entry_numbers="+entry_numbers+"&trait_contains="+JSON.stringify(trait_contains_array)+"&trait_component_list="+JSON.stringify(trait_components)+"&trait_list="+JSON.stringify(traits)+"&dataLevel="+data_level+"&phenotype_min_value="+phenotype_min_value+"&phenotype_max_value="+phenotype_max_value+"&exclude_phenotype_outlier="+exclude_phenotype_outlier+"&repetitive_measurements="+repetitive_measurements); }); diff --git a/mason/breeders_toolbox/trial/download_trials_phenotypes_dialog.mas b/mason/breeders_toolbox/trial/download_trials_phenotypes_dialog.mas index d7496ce686..6fd1048376 100644 --- a/mason/breeders_toolbox/trial/download_trials_phenotypes_dialog.mas +++ b/mason/breeders_toolbox/trial/download_trials_phenotypes_dialog.mas @@ -35,6 +35,21 @@ <&| /page/info_section.mas, title=>"Additional Search Options", collapsible => 1, collapsed=>1 &> +
    + +
    + +
    +
    + +
    @@ -144,6 +159,7 @@ jQuery(document).ready(function() { var trial_ids = selected.join(","); var trial_id_array = trial_ids.split(","); var format = jQuery("#download_trials_phenotypes_format").val(); + var repetitive_measurements = jQuery("#download_repetitive_measurements_trials_option").val(); var timestamp = jQuery("#download_trials_phenotypes_timestamp_option").val(); var data_level = jQuery("#download_trials_phenotypes_level_option").val(); var trait_contains = jQuery("#download_trials_phenotype_trait_contains").val(); @@ -170,7 +186,7 @@ jQuery(document).ready(function() { } if (selected.length !== 0) { - window.open("/breeders/trials/phenotype/download?trial_list="+JSON.stringify(trial_id_array)+"&format="+format+"×tamp="+timestamp+"&trait_contains="+JSON.stringify(trait_contains_array)+"&trait_list="+JSON.stringify(traits)+"&accession_list="+JSON.stringify(accessions)+"&plot_list="+JSON.stringify(plots)+"&plant_list="+JSON.stringify(plants)+"&dataLevel="+data_level+"&phenotype_min_value="+phenotype_min_value+"&phenotype_max_value="+phenotype_max_value+"&exclude_phenotype_outlier="+exclude_phenotype_outlier); + window.open("/breeders/trials/phenotype/download?trial_list="+JSON.stringify(trial_id_array)+"&format="+format+"×tamp="+timestamp+"&trait_contains="+JSON.stringify(trait_contains_array)+"&trait_list="+JSON.stringify(traits)+"&accession_list="+JSON.stringify(accessions)+"&plot_list="+JSON.stringify(plots)+"&plant_list="+JSON.stringify(plants)+"&dataLevel="+data_level+"&phenotype_min_value="+phenotype_min_value+"&phenotype_max_value="+phenotype_max_value+"&exclude_phenotype_outlier="+exclude_phenotype_outlier+"&repetitive_measurements="+repetitive_measurements); } else { alert("No leaf nodes selected for download."); } }); diff --git a/mason/breeders_toolbox/trial/phenotype_summary.mas b/mason/breeders_toolbox/trial/phenotype_summary.mas index c43950d6e0..9f89ca3bfb 100644 --- a/mason/breeders_toolbox/trial/phenotype_summary.mas +++ b/mason/breeders_toolbox/trial/phenotype_summary.mas @@ -22,6 +22,25 @@ $trial_stock_type => undef shape-rendering: crispEdges; } +.form-group { + display: flex; + align-items: center; +} + +.form-group label { + margin-right: 10px; + white-space: nowrap; +} + +.form-group input, +.form-group select { + flex-grow: 1; +} + +input[type="text"] { + width: 100px; +} + @@ -30,9 +49,17 @@ $trial_stock_type => undef
    -
    - -
    +
    +
    + + +
    +
    + + +
    +
    + + + + + + +
    + + + + + + + + + + + + +
    + + +
    +
    diff --git a/mason/page/detail_page_2_col_section.mas b/mason/page/detail_page_2_col_section.mas index 02abead7d5..1baf418e76 100644 --- a/mason/page/detail_page_2_col_section.mas +++ b/mason/page/detail_page_2_col_section.mas @@ -199,6 +199,9 @@ $field_headers => () <& /breeders_toolbox/trial/trial_raw_data.mas, trial_id => $trial_id, trial_stock_type => $trial_stock_type &> % } +% if ($info_section_id eq 'repetitive_measurements_data') { + <& /breeders_toolbox/trial/repetitive_measurements_data.mas, trial_id => $trial_id, trial_stock_type =>$trial_stock_type &> +% } % if ($info_section_id eq 'compute_derived_traits'){ <& /breeders_toolbox/trial/derived_trait.mas, trial_id => $trial_id &> diff --git a/t/lib/SGN/Test/Fixture.pm b/t/lib/SGN/Test/Fixture.pm index 39997138f8..35a4ec895e 100644 --- a/t/lib/SGN/Test/Fixture.pm +++ b/t/lib/SGN/Test/Fixture.pm @@ -161,7 +161,7 @@ sub dbic_schema { return $self->people_schema(); } - return undef; + return; } sub get_conf { @@ -197,6 +197,11 @@ sub get_db_stats { $rs = $self->bcs_schema()->resultset('Cv::Cvterm')->search( {}, { columns => [ { 'cvterm_id_max' => { max => 'cvterm_id' }} ] } ); $stats->{cvterms} = $rs->get_column('cvterm_id_max')->first(); + # count cvtermprops + # + $rs = $self->bcs_schema()->resultset('Cv::Cvtermprop')->search( {}, { columns => [ { 'cvtermprop_id_max' => { max => 'cvtermprop_id' }} ] } ); + $stats->{cvtermprops} = $rs->get_column('cvtermprop_id_max')->first(); + # count users # $rs = $self->people_schema()->resultset('SpPerson')->search( {}, { columns => [ { 'sp_person_id_max' => { max => 'sp_person_id' }} ] } ); @@ -303,7 +308,7 @@ sub clean_up_db { if (! defined($self->dbstats_start())) { print STDERR "Can't clean up becaues dbstats were not run at the beginning of the test!\n"; } - my @deletion_order = ('stock_owners', 'stock_relationships', 'stockprops', 'stocks', 'project_owners', 'project_relationships', 'projectprops', 'project_images', 'projects', 'cvterms', 'datasets', 'list_elements', 'lists', 'phenotypes', 'genotypes', 'locations', 'protocols', 'metadata_files', 'metadata', 'experiment_files', 'experiment_json', 'experiments'); + my @deletion_order = ('stock_owners', 'stock_relationships', 'stockprops', 'stocks', 'project_owners', 'project_relationships', 'projectprops', 'project_images', 'projects', 'cvterms', 'cvtermprops', 'datasets', 'list_elements', 'lists', 'phenotypes', 'genotypes', 'locations', 'protocols', 'metadata_files', 'metadata', 'experiment_files', 'experiment_json', 'experiments'); foreach my $table (@deletion_order) { print STDERR "CLEANING $table...\n"; my $count = $stats->{$table} - $self->dbstats_start()->{$table}; @@ -344,6 +349,10 @@ sub delete_table_entries { $rs = $self->bcs_schema()->resultset('Cv::Cvterm')->search( { cvterm_id => { '>' => $previous_max_id }} ); } + if ($table eq "cvtermprops") { + $rs = $self->bcs_schema()->resultset('Cv::Cvtermprop')->search( { cvtermprop_id => { '>' => $previous_max_id }} ); + } + if ($table eq "people") { $rs = $self->people_schema()->resultset('SpPerson')->search( { sp_person_id => { '>' => $previous_max_id } } ); } diff --git a/t/selenium2/breeders/upload_phenotype/fieldbook_phenotype_upload.t b/t/selenium2/breeders/upload_phenotype/fieldbook_phenotype_upload.t index 6edc47fe4c..b6c25e26fa 100644 --- a/t/selenium2/breeders/upload_phenotype/fieldbook_phenotype_upload.t +++ b/t/selenium2/breeders/upload_phenotype/fieldbook_phenotype_upload.t @@ -1,7 +1,7 @@ +use strict; +use warnings; use lib 't/lib'; - use Test::More 'tests' => 57; - use SGN::Test::WWW::WebDriver; use SGN::Test::Fixture; @@ -14,6 +14,8 @@ $t->while_logged_in_as("submitter", sub { $t->get_ok('/breeders/trial/137'); sleep(4); + $t->wait_for_working_dialog(); + $t->find_element_ok("trial_upload_files_onswitch", "id", "click on upload_fieldbook_link ")->click(); sleep(1); @@ -30,6 +32,7 @@ $t->while_logged_in_as("submitter", sub { $t->find_element_ok("upload_fieldbook_phenotype_data_level", "id", "find fieldbook phenotype data level select")->click(); sleep(1); + $t->find_element_ok('//select[@id="upload_fieldbook_phenotype_data_level"]/option[@value="plots"]', 'xpath', "Select 'plots' as value of phenotype data level")->click(); my $filename = $f->config->{basepath}."/t/data/fieldbook/fieldbook_phenotype_file_no_fieldbook_image.csv"; @@ -61,12 +64,17 @@ $t->while_logged_in_as("submitter", sub { ok($verify_status =~ /Metadata saved for archived file./, "Verify the positive store validation"); ok($verify_status =~ /Upload Successfull!/, "Verify the positive store validation"); + #back to the trial page and re-upload !! $t->get_ok('/breeders/trial/137'); sleep(2); + $t->wait_for_working_dialog(); + $t->find_element_ok("trial_upload_files_onswitch", "id", "click on upload_fieldbook_link ")->click(); sleep(2); + $t->wait_for_working_dialog(); + $t->find_element_ok("upload_fieldbook_phenotypes_link", "id", "click on upload_spreadsheet_link ")->click(); sleep(4); @@ -83,7 +91,7 @@ $t->while_logged_in_as("submitter", sub { $t->find_element_ok("upload_fieldbook_phenotype_submit_verify", "id", "submit spreadsheet file for verification")->click(); sleep(3); - + $verify_status = $t->find_element_ok( "upload_phenotype_fieldbook_verify_status", "id", "verify the verification")->get_attribute('innerHTML'); @@ -100,9 +108,8 @@ $t->while_logged_in_as("submitter", sub { $verify_status = $t->find_element_ok( "upload_phenotype_fieldbook_verify_status", "id", "verify the verification")->get_attribute('innerHTML'); - ok($verify_status =~ /0 new values stored/, "Verify warnings after store validation"); - ok($verify_status =~ /28 previously stored values skipped/, "Verify warnings after store validation"); + ok($verify_status =~ /30 previously stored values skipped/, "Verify warnings after store validation"); ok($verify_status =~ /0 previously stored values overwritten/, "Verify warnings after store validation"); ok($verify_status =~ /0 previously stored values removed/, "Verify warnings after store validation"); ok($verify_status =~ /Upload Successfull!/, "Verify warnings after store validation"); @@ -131,6 +138,7 @@ $t->while_logged_in_as("submitter", sub { "upload_phenotype_fieldbook_verify_status", "id", "verify the verification")->get_attribute('innerHTML'); + #check for warnings after the store_validation ok($verify_status =~ /File data successfully parsed/, "Verify warnings after store validation"); ok($verify_status =~ /File data verified. Plot names and trait names are valid./, "Verify warnings after store validation"); ok($verify_status =~ /Warnings are shown in yellow. Either fix the file and try again/, "Verify warnings after store validation"); @@ -145,7 +153,7 @@ $t->while_logged_in_as("submitter", sub { "id", "verify the verification")->get_attribute('innerHTML'); ok($verify_status =~ /0 new values stored/, "Verify warnings after store validation"); - ok($verify_status =~ /28 previously stored values skipped/, "Verify warnings after store validation"); + ok($verify_status =~ /30 previously stored values skipped/, "Verify warnings after store validation"); ok($verify_status =~ /0 previously stored values overwritten/, "Verify warnings after store validation"); ok($verify_status =~ /0 previously stored values removed/, "Verify warnings after store validation"); ok($verify_status =~ /Upload Successfull!/, "Verify warnings after store validation"); diff --git a/t/selenium2/breeders/upload_phenotype/upload_datacollector_spreadsheet.t b/t/selenium2/breeders/upload_phenotype/upload_datacollector_spreadsheet.t index d102b91ade..d4c372efa8 100644 --- a/t/selenium2/breeders/upload_phenotype/upload_datacollector_spreadsheet.t +++ b/t/selenium2/breeders/upload_phenotype/upload_datacollector_spreadsheet.t @@ -1,4 +1,5 @@ - +use strict; +use warnings; use lib 't/lib'; use Test::More 'tests' => 39; @@ -17,10 +18,14 @@ $t->while_logged_in_as("submitter", sub { $t->get_ok('/breeders/trial/137'); sleep(3); - my $trail_files_onswitch = $t->find_element_ok("trial_upload_files_onswitch", "id", "find and open 'trial upload files onswitch' and click"); - $trail_files_onswitch->click(); + $t->wait_for_working_dialog(); + + my $trial_files_onswitch = $t->find_element_ok("trial_upload_files_onswitch", "id", "find and open 'trial upload files onswitch' and click"); + $trial_files_onswitch->click(); sleep(2); + $t->wait_for_working_dialog(); + $t->find_element_ok("upload_datacollector_phenotypes_link", "id", "click on upload_trial_link ")->click(); sleep(2); @@ -65,10 +70,14 @@ $t->while_logged_in_as("submitter", sub { $t->get_ok('/breeders/trial/137'); sleep(3); - my $trail_files_onswitch = $t->find_element_ok("trial_upload_files_onswitch", "id", "find and open 'trial upload files onswitch' and click"); - $trail_files_onswitch->click(); + $t->wait_for_working_dialog(); + + my $trial_files_onswitch = $t->find_element_ok("trial_upload_files_onswitch", "id", "find and open 'trial upload files onswitch' and click"); + $trial_files_onswitch->click(); sleep(2); + $t->wait_for_working_dialog(); + $t->find_element_ok("upload_datacollector_phenotypes_link", "id", "click on upload_spreadsheet_link ")->click(); sleep(2); @@ -91,12 +100,12 @@ $t->while_logged_in_as("submitter", sub { $verify_status = $t->find_element_ok( "upload_phenotype_datacollector_verify_status", "id", "verify the verification")->get_attribute('innerHTML'); - + diag("verify_statues : $verify_status"); ok($verify_status =~ /File data successfully parsed/, "Verify warnings after store validation"); ok($verify_status =~ /File data verified. Plot names and trait names are valid./, "Verify warnings after store validation"); ok($verify_status =~ /Warnings are shown in yellow. Either fix the file and try again/, "Verify warnings after store validation"); ok($verify_status =~ /To overwrite previously stored values instead/, "Verify warnings after store validation"); - ok($verify_status =~ /There are 57 values in your file that are the same as values already stored in the database./, "Verify warnings after store validation"); + ok($verify_status =~ /There are 44 values in your file that are the same as values already stored in the database./, "Verify warnings after store validation"); $t->find_element_ok("upload_datacollector_phenotype_submit_store", "id", "submit spreadsheet file for storage")->click(); sleep(10); @@ -105,7 +114,7 @@ $t->while_logged_in_as("submitter", sub { "upload_phenotype_datacollector_verify_status", "id", "verify the verification")->get_attribute('innerHTML'); - ok($verify_status =~ /57 previously stored values skipped/, "Verify warnings after store validation"); + ok($verify_status =~ /60 previously stored values skipped/, "Verify warnings after store validation"); ok($verify_status =~ /0 previously stored values overwritten/, "Verify warnings after store validation"); ok($verify_status =~ /Metadata saved for archived file./, "Verify warnings after store validation"); ok($verify_status =~ /0 previously stored values removed/, "Verify warnings after store validation"); diff --git a/t/selenium2/breeders/upload_phenotype/upload_phenotyping_spreadsheet.t b/t/selenium2/breeders/upload_phenotype/upload_phenotyping_spreadsheet.t index 050095694f..8e3edb3943 100644 --- a/t/selenium2/breeders/upload_phenotype/upload_phenotyping_spreadsheet.t +++ b/t/selenium2/breeders/upload_phenotype/upload_phenotyping_spreadsheet.t @@ -1,7 +1,8 @@ +use strict; +use warnings; use lib 't/lib'; use Test::More; - use SGN::Test::WWW::WebDriver; use SGN::Test::Fixture; @@ -16,10 +17,14 @@ $t->while_logged_in_as("submitter", sub { $t->get_ok('/breeders/trial/137'); sleep(4); - my $trail_files_onswitch = $t->find_element_ok("trial_upload_files_onswitch", "id", "find and open 'trial upload files onswitch' and click"); - $trail_files_onswitch->click(); + $t->wait_for_working_dialog(); + + my $trial_files_onswitch = $t->find_element_ok("trial_upload_files_onswitch", "id", "find and open 'trial upload files onswitch' and click"); + $trial_files_onswitch->click(); sleep(2); + $t->wait_for_working_dialog(); + $t->find_element_ok("upload_spreadsheet_phenotypes_link", "id", "click on upload_spreadsheet_link ")->click(); sleep(4); @@ -71,10 +76,14 @@ $t->while_logged_in_as("submitter", sub { $t->get_ok('/breeders/trial/137'); sleep(3); - $trail_files_onswitch = $t->find_element_ok("trial_upload_files_onswitch", "id", "find and open 'trial upload files onswitch' and click"); - $trail_files_onswitch->click(); + $t->wait_for_working_dialog(); + + $trial_files_onswitch = $t->find_element_ok("trial_upload_files_onswitch", "id", "find and open 'trial upload files onswitch' and click"); + $trial_files_onswitch->click(); sleep(2); + $t->wait_for_working_dialog(); + $t->find_element_ok("upload_spreadsheet_phenotypes_link", "id", "click on upload_spreadsheet_link ")->click(); sleep(4); @@ -108,7 +117,7 @@ $t->while_logged_in_as("submitter", sub { ok($verify_status =~ /File data verified. Plot names and trait names are valid./, "Verify warnings after store validation"); ok($verify_status =~ /Warnings are shown in yellow. Either fix the file and try again/, "Verify warnings after store validation"); ok($verify_status =~ /To overwrite previously stored values instead/, "Verify warnings after store validation"); - ok($verify_status =~ /There are 60 values in your file that are the same as values already stored in the database./, "Verify warnings after store validation"); + ok($verify_status =~ /There are 2 values in your file that are the same as values already stored in the database./, "Verify warnings after store validation"); $t->find_element_ok("upload_spreadsheet_phenotype_submit_store", "id", "submit spreadsheet file for storage")->click(); @@ -162,7 +171,7 @@ $t->while_logged_in_as("submitter", sub { ok($verify_status =~ /File data verified. Plot names and trait names are valid./, "Verify warnings after store validation"); ok($verify_status =~ /Warnings are shown in yellow. Either fix the file and try again/, "Verify warnings after store validation"); ok($verify_status =~ /To overwrite previously stored values instead/, "Verify warnings after store validation"); - ok($verify_status =~ /There are 60 values in your file that are the same as values already stored in the database./, "Verify warnings after store validation"); + ok($verify_status =~ /There are 2 values in your file that are the same as values already stored in the database./, "Verify warnings after store validation"); $t->find_element_ok("upload_spreadsheet_phenotype_submit_store", "id", "submit spreadsheet file for storage")->click(); sleep(10); diff --git a/t/unit_fixture/CXGN/Phenotype/PhenotypeMatrix.t b/t/unit_fixture/CXGN/Phenotype/PhenotypeMatrix.t new file mode 100644 index 0000000000..be64467931 --- /dev/null +++ b/t/unit_fixture/CXGN/Phenotype/PhenotypeMatrix.t @@ -0,0 +1,130 @@ + + +use strict; +use Test::More; +use Data::Dumper; +use lib 't/lib'; +use SGN::Test::Fixture; + +use CXGN::Phenotypes::PhenotypeMatrix; + +my $f = SGN::Test::Fixture->new(); + +my $dbh = $f->dbh(); + +print STDERR "Inserting a duplicate measurement...\n"; +# +my $q = "insert into phenotype (cvalue_id, value, uniquename) values (70773, '3.0', 'fresh shoot weight date: 2024-03-01_19:20:56 operator = test_operator_323')"; + +my $h = $dbh->prepare($q); +$h->execute(); + +my $q2 = "insert into cvtermprop (cvterm_id, type_id, value) values (70773, (select cvterm_id from cvterm where name='trait_repeat_type'), 'multiple')"; + +my $h2 = $dbh->prepare($q2); +$h2->execute(); + +my $q3 = "insert into nd_experiment_phenotype (nd_experiment_id, phenotype_id) values (76184, (select phenotype_id FROM phenotype where uniquename ilike '%test_operator_323%'))"; + +my $h3= $dbh->prepare($q3); +$h3->execute(); + +my $q4 = "select phenotype_id, cvalue_id, value from phenotype where value='3.0'"; + +my $h4 = $dbh->prepare($q4); +$h4->execute(); + +while (my ($phenotype_id, $cvalue_id, $value) = $h4->fetchrow_array()) { + print STDERR "PHENOTYPE_ID $phenotype_id CVALUE_ID $cvalue_id VALUE $value\n"; + +} + +my $dbhost = $f->config->{dbhost}; +my $dbname = $f->config->{dbname}; +my $dbpass = $f->config->{dbpass}; + +print STDERR "Running matview refresh with -H $dbhost -D $dbname -U postgres -P $dbpass -m phenotypes\n"; +system("perl bin/refresh_matviews.pl -H $dbhost -D $dbname -U postgres -P $dbpass -m phenotypes"); + + + +print STDERR "Downloading data...\n"; + +my $phenotypes_search = CXGN::Phenotypes::PhenotypeMatrix->new( + bcs_schema=>$f->bcs_schema(), + search_type=> 'MaterializedViewTable', + data_level=> 'plot', + trial_list=> [ 139 ], + trait_list=> [ 70773 ], + repetitive_measurements => 'average', +# # program_list=>$self->program_list, + # folder_list=>$self->folder_list, + # year_list=>$year_list, + # location_list=>$location_list, + # accession_list=>$accession_list, + # plot_list=>$plot_list, + # plant_list=>$plant_list, + # include_timestamp=>$include_timestamp, + # include_pedigree_parents=>$include_pedigree_parents, + # exclude_phenotype_outlier=>0, + # dataset_excluded_outliers=>$dataset_excluded_outliers, + # trait_contains=>$trait_contains, + # phenotype_min_value=>$phenotype_min_value, + # phenotype_max_value=>$phenotype_max_value, + # start_date => $start_date, + # end_date => $end_date, + # include_dateless_items => $include_dateless_items, + # limit=>$limit, offset=>$offset + ); + + +my @results = ( '2.25', '1.5|3.0', '1.5', '3.0', '4.5' ); + +my $search_type = 'MaterializedViewTable'; +foreach my $repetitive_measurements ('average', 'all_values_single_line', 'first', 'last', 'sum') { + + $phenotypes_search->search_type($search_type); + $phenotypes_search->repetitive_measurements($repetitive_measurements); + + my @data = $phenotypes_search->get_phenotype_matrix(); + + foreach my $d (@data) { + if (my @out = grep( /KASESE_TP2013_1619/, @$d )) { + my $result = shift(@results); + print STDERR "$search_type, $repetitive_measurements, GOT: $d->[39], EXPECTED: $result\n"; + + is( $d->[39], $result, "test $result" ); + #print STDERR "MATCHED: ".Dumper($d); + } + } +} + + +@results = ( '2.25', '1.5|3.0', '1.5', '3.0', '4.5'); +$search_type = "Native"; + +foreach my $repetitive_measurements ('average', 'all_values_single_line', 'first', 'last', 'sum') { + + $phenotypes_search->search_type($search_type); + $phenotypes_search->repetitive_measurements($repetitive_measurements); + + + my @data = $phenotypes_search->get_phenotype_matrix(); + + foreach my $d (@data) { + if (my @out = grep( /KASESE_TP2013_1619/, @$d )) { + my $result = shift(@results); + print STDERR "$search_type, $repetitive_measurements, GOT: $d->[30], EXPECTED: $result\n"; + + is( $d->[30], $result, "test $result" ); + #print STDERR "MATCHED: ".Dumper($d); + } + } +} + +$f->clean_up_db(); + +system("perl bin/refresh_matviews.pl -H $dbhost -D $dbname -U postgres -P $dbpass -m phenotypes"); + + +done_testing(); diff --git a/t/unit_fixture/CXGN/Trial/MultiTrialCreate.t b/t/unit_fixture/CXGN/Trial/MultiTrialCreate.t index beeca88826..a036384305 100644 --- a/t/unit_fixture/CXGN/Trial/MultiTrialCreate.t +++ b/t/unit_fixture/CXGN/Trial/MultiTrialCreate.t @@ -7,6 +7,7 @@ use Data::Dumper; my $fix = SGN::Test::Fixture->new(); + is(ref($fix->config()), "HASH", 'hashref check'); BEGIN {use_ok('CXGN::Trial::TrialCreate');} @@ -120,4 +121,7 @@ foreach my $acc (@$accession_names) { } +$fix->clean_up_db(); + + done_testing(); diff --git a/t/unit_fixture/CXGN/Uploading/KASPGenotypes.t b/t/unit_fixture/CXGN/Uploading/KASPGenotypes.t index a8356a9621..6fc3cd5de1 100644 --- a/t/unit_fixture/CXGN/Uploading/KASPGenotypes.t +++ b/t/unit_fixture/CXGN/Uploading/KASPGenotypes.t @@ -342,5 +342,10 @@ $mech->get_ok('http://localhost:3010/ajax/breeders/trial/'.$genotyping_project_i $response = decode_json $mech->content; is($response->{'success'}, '1'); +$f->clean_up_db(); + +my $bs = CXGN::BreederSearch->new( { dbh=> $f->dbh() }); + +my $refresh = $bs->refresh_matviews($f->config->{dbhost}, $f->config->{dbname}, $f->config->{dbuser}, $f->config->{dbpass}, 'phenotypes', 'concurrent', $f->config->{basepath}); done_testing(); diff --git a/t/unit_fixture/CXGN/Uploading/LocationUpload.t b/t/unit_fixture/CXGN/Uploading/LocationUpload.t index 6d4f8c48ee..7e2b0ce8c0 100644 --- a/t/unit_fixture/CXGN/Uploading/LocationUpload.t +++ b/t/unit_fixture/CXGN/Uploading/LocationUpload.t @@ -112,6 +112,9 @@ for my $extension ("xls", "xlsx") { print STDERR "Locationprop: " . $post1_locationprop_diff . "\n"; ok($post1_locationprop_diff == 4, "check locationprop table after upload excel location"); - $f->clean_up_db(); + $f->clean_up_db(); } + +$f->clean_up_db(); + done_testing(); diff --git a/t/unit_fixture/CXGN/Uploading/Phenotype.t b/t/unit_fixture/CXGN/Uploading/Phenotype.t index ec6ae087ce..c1695ea167 100644 --- a/t/unit_fixture/CXGN/Uploading/Phenotype.t +++ b/t/unit_fixture/CXGN/Uploading/Phenotype.t @@ -108,9 +108,12 @@ for my $extension ("xls", "xlsx") { my $parsed_file = $parser->parse('phenotype spreadsheet', $archived_filename_with_path, 1, 'plots', $f->bcs_schema); ok($parsed_file, "Check if parse parse phenotype spreadsheet works"); - print STDERR Dumper $parsed_file; + print STDERR "PARSED FILE FOR $archived_filename_with_path".Dumper($parsed_file); - is_deeply($parsed_file, {'variables' => ['dry matter content|CO_334:0000092','fresh root weight|CO_334:0000012','fresh shoot weight|CO_334:0000016','harvest index|CO_334:0000015','notes'],'data' => {'test_trial23' => {'fresh shoot weight|CO_334:0000016' => ['22','2016-02-11 01:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 01:12:20-0500'],'harvest index|CO_334:0000015' => ['2.8','2016-03-16 01:12:20-0500'],'dry matter content|CO_334:0000092' => ['38','2016-04-27 01:12:20-0500']},'test_trial25' => {'fresh root weight|CO_334:0000012' => ['15','2016-01-15 09:12:20-0500'],'harvest index|CO_334:0000015' => ['4.8','2016-03-16 09:12:20-0500'],'dry matter content|CO_334:0000092' => ['35','2016-04-27 09:12:20-0500'],'fresh shoot weight|CO_334:0000016' => ['24','2016-02-11 09:12:20-0500']},'test_trial24' => {'fresh shoot weight|CO_334:0000016' => ['23','2016-02-11 11:12:20-0500'],'harvest index|CO_334:0000015' => ['3.8','2016-03-16 11:12:20-0500'],'dry matter content|CO_334:0000092' => ['39','2016-04-27 11:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 11:12:20-0500']},'test_trial22' => {'fresh root weight|CO_334:0000012' => ['15','2016-01-15 02:12:20-0500'],'notes' => ['testnote2',''],'harvest index|CO_334:0000015' => ['1.8','2016-03-16 02:12:20-0500'],'dry matter content|CO_334:0000092' => ['30','2016-04-27 02:12:20-0500'],'fresh shoot weight|CO_334:0000016' => ['21','2016-02-11 02:12:20-0500']},'test_trial210' => {'harvest index|CO_334:0000015' => ['9.8','2016-03-16 15:12:20-0500'],'dry matter content|CO_334:0000092' => ['30','2016-04-27 15:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 15:12:20-0500'],'fresh shoot weight|CO_334:0000016' => ['29','2016-02-11 15:12:20-0500']},'test_trial26' => {'fresh shoot weight|CO_334:0000016' => ['25','2016-02-11 16:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 16:12:20-0500'],'harvest index|CO_334:0000015' => ['5.8','2016-03-16 16:12:20-0500'],'dry matter content|CO_334:0000092' => ['30','2016-04-27 16:12:20-0500']},'test_trial211' => {'fresh shoot weight|CO_334:0000016' => ['30','2016-02-11 03:12:20-0500'],'harvest index|CO_334:0000015' => ['10.8','2016-03-16 03:12:20-0500'],'dry matter content|CO_334:0000092' => ['38','2016-04-27 03:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 03:12:20-0500']},'test_trial29' => {'fresh root weight|CO_334:0000012' => ['15','2016-01-15 14:12:20-0500'],'harvest index|CO_334:0000015' => ['8.8','2016-03-16 14:12:20-0500'],'dry matter content|CO_334:0000092' => ['35','2016-04-27 14:12:20-0500'],'fresh shoot weight|CO_334:0000016' => ['28','2016-02-11 14:12:20-0500']},'test_trial28' => {'fresh shoot weight|CO_334:0000016' => ['27','2016-02-11 13:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 13:12:20-0500'],'harvest index|CO_334:0000015' => ['7.8','2016-03-16 13:12:20-0500'],'dry matter content|CO_334:0000092' => ['39','2016-04-27 13:12:20-0500']},'test_trial27' => {'fresh shoot weight|CO_334:0000016' => ['26','2016-02-11 17:12:20-0500'],'harvest index|CO_334:0000015' => ['6.8','2016-03-16 17:12:20-0500'],'dry matter content|CO_334:0000092' => ['38','2016-04-27 17:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 17:12:20-0500']},'test_trial21' => {'notes' => ['test note1',''],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 12:12:20-0500'],'dry matter content|CO_334:0000092' => ['35','2016-04-27 12:12:20-0500'],'harvest index|CO_334:0000015' => ['0.8','2016-03-16 12:12:20-0500'],'fresh shoot weight|CO_334:0000016' => ['20','2016-02-11 12:12:20-0500']},'test_trial212' => {'fresh shoot weight|CO_334:0000016' => ['31','2016-02-11 21:12:20-0500'],'harvest index|CO_334:0000015' => ['11.8','2016-03-16 21:12:20-0500'],'dry matter content|CO_334:0000092' => ['39','2016-04-27 21:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 21:12:20-0500']},'test_trial215' => {'fresh shoot weight|CO_334:0000016' => ['34','2016-02-11 19:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 19:12:20-0500'],'dry matter content|CO_334:0000092' => ['38','2016-04-27 19:12:20-0500'],'harvest index|CO_334:0000015' => ['14.8','2016-03-16 19:12:20-0500']},'test_trial214' => {'fresh shoot weight|CO_334:0000016' => ['33','2016-02-11 23:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 23:12:20-0500'],'dry matter content|CO_334:0000092' => ['30','2016-04-27 23:12:20-0500'],'harvest index|CO_334:0000015' => ['13.8','2016-03-16 23:12:20-0500']},'test_trial213' => {'harvest index|CO_334:0000015' => ['12.8','2016-03-16 22:12:20-0500'],'dry matter content|CO_334:0000092' => ['35','2016-04-27 22:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 22:12:20-0500'],'fresh shoot weight|CO_334:0000016' => ['32','2016-02-11 22:12:20-0500']}},'units' => ['test_trial21','test_trial210','test_trial211','test_trial212','test_trial213','test_trial214','test_trial215','test_trial22','test_trial23','test_trial24','test_trial25','test_trial26','test_trial27','test_trial28','test_trial29']}, "Check parse phenotyping spreadsheet" ); + is_deeply($parsed_file, + {'variables' => ['dry matter content|CO_334:0000092','fresh root weight|CO_334:0000012','fresh shoot weight|CO_334:0000016','harvest index|CO_334:0000015','notes'],'data' => {'test_trial24' => {'harvest index|CO_334:0000015' => [['3.8','2016-03-16 11:12:20-0500']],'fresh root weight|CO_334:0000012' => [['15','2016-01-15 11:12:20-0500']],'dry matter content|CO_334:0000092' => [['39','2016-04-27 11:12:20-0500']],'fresh shoot weight|CO_334:0000016' => [['23','2016-02-11 11:12:20-0500']]},'test_trial25' => {'dry matter content|CO_334:0000092' => [['35','2016-04-27 09:12:20-0500']],'fresh shoot weight|CO_334:0000016' => [['24','2016-02-11 09:12:20-0500']],'fresh root weight|CO_334:0000012' => [['15','2016-01-15 09:12:20-0500']],'harvest index|CO_334:0000015' => [['4.8','2016-03-16 09:12:20-0500']]},'test_trial26' => {'fresh root weight|CO_334:0000012' => [['15','2016-01-15 16:12:20-0500']],'fresh shoot weight|CO_334:0000016' => [['25','2016-02-11 16:12:20-0500']],'dry matter content|CO_334:0000092' => [['30','2016-04-27 16:12:20-0500']],'harvest index|CO_334:0000015' => [['5.8','2016-03-16 16:12:20-0500']]},'test_trial29' => {'harvest index|CO_334:0000015' => [['8.8','2016-03-16 14:12:20-0500']],'fresh root weight|CO_334:0000012' => [['15','2016-01-15 14:12:20-0500']],'dry matter content|CO_334:0000092' => [['35','2016-04-27 14:12:20-0500']],'fresh shoot weight|CO_334:0000016' => [['28','2016-02-11 14:12:20-0500']]},'test_trial213' => {'fresh root weight|CO_334:0000012' => [['15','2016-01-15 22:12:20-0500']],'fresh shoot weight|CO_334:0000016' => [['32','2016-02-11 22:12:20-0500']],'dry matter content|CO_334:0000092' => [['35','2016-04-27 22:12:20-0500']],'harvest index|CO_334:0000015' => [['12.8','2016-03-16 22:12:20-0500']]},'test_trial28' => {'dry matter content|CO_334:0000092' => [['39','2016-04-27 13:12:20-0500']],'fresh shoot weight|CO_334:0000016' => [['27','2016-02-11 13:12:20-0500']],'fresh root weight|CO_334:0000012' => [['15','2016-01-15 13:12:20-0500']],'harvest index|CO_334:0000015' => [['7.8','2016-03-16 13:12:20-0500']]},'test_trial214' => {'harvest index|CO_334:0000015' => [['13.8','2016-03-16 23:12:20-0500']],'dry matter content|CO_334:0000092' => [['30','2016-04-27 23:12:20-0500']],'fresh shoot weight|CO_334:0000016' => [['33','2016-02-11 23:12:20-0500']],'fresh root weight|CO_334:0000012' => [['15','2016-01-15 23:12:20-0500']]},'test_trial210' => {'fresh root weight|CO_334:0000012' => [['15','2016-01-15 15:12:20-0500']],'dry matter content|CO_334:0000092' => [['30','2016-04-27 15:12:20-0500']],'fresh shoot weight|CO_334:0000016' => [['29','2016-02-11 15:12:20-0500']],'harvest index|CO_334:0000015' => [['9.8','2016-03-16 15:12:20-0500']]},'test_trial22' => {'harvest index|CO_334:0000015' => [['1.8','2016-03-16 02:12:20-0500']],'fresh root weight|CO_334:0000012' => [['15','2016-01-15 02:12:20-0500']],'notes' => [['testnote2','']],'fresh shoot weight|CO_334:0000016' => [['21','2016-02-11 02:12:20-0500']],'dry matter content|CO_334:0000092' => [['30','2016-04-27 02:12:20-0500']]},'test_trial212' => {'fresh root weight|CO_334:0000012' => [['15','2016-01-15 21:12:20-0500']],'fresh shoot weight|CO_334:0000016' => [['31','2016-02-11 21:12:20-0500']],'dry matter content|CO_334:0000092' => [['39','2016-04-27 21:12:20-0500']],'harvest index|CO_334:0000015' => [['11.8','2016-03-16 21:12:20-0500']]},'test_trial21' => {'harvest index|CO_334:0000015' => [['0.8','2016-03-16 12:12:20-0500']],'dry matter content|CO_334:0000092' => [['35','2016-04-27 12:12:20-0500']],'fresh shoot weight|CO_334:0000016' => [['20','2016-02-11 12:12:20-0500']],'notes' => [['test note1','']],'fresh root weight|CO_334:0000012' => [['15','2016-01-15 12:12:20-0500']]},'test_trial23' => {'harvest index|CO_334:0000015' => [['2.8','2016-03-16 01:12:20-0500']],'fresh root weight|CO_334:0000012' => [['15','2016-01-15 01:12:20-0500']],'dry matter content|CO_334:0000092' => [['38','2016-04-27 01:12:20-0500']],'fresh shoot weight|CO_334:0000016' => [['22','2016-02-11 01:12:20-0500']]},'test_trial211' => {'fresh shoot weight|CO_334:0000016' => [['30','2016-02-11 03:12:20-0500']],'dry matter content|CO_334:0000092' => [['38','2016-04-27 03:12:20-0500']],'fresh root weight|CO_334:0000012' => [['15','2016-01-15 03:12:20-0500']],'harvest index|CO_334:0000015' => [['10.8','2016-03-16 03:12:20-0500']]},'test_trial27' => {'fresh root weight|CO_334:0000012' => [['15','2016-01-15 17:12:20-0500']],'fresh shoot weight|CO_334:0000016' => [['26','2016-02-11 17:12:20-0500']],'dry matter content|CO_334:0000092' => [['38','2016-04-27 17:12:20-0500']],'harvest index|CO_334:0000015' => [['6.8','2016-03-16 17:12:20-0500']]},'test_trial215' => {'fresh shoot weight|CO_334:0000016' => [['34','2016-02-11 19:12:20-0500']],'dry matter content|CO_334:0000092' => [['38','2016-04-27 19:12:20-0500']],'fresh root weight|CO_334:0000012' => [['15','2016-01-15 19:12:20-0500']],'harvest index|CO_334:0000015' => [['14.8','2016-03-16 19:12:20-0500']]}},'units' => ['test_trial21','test_trial210','test_trial211','test_trial212','test_trial213','test_trial214','test_trial215','test_trial22','test_trial23','test_trial24','test_trial25','test_trial26','test_trial27','test_trial28','test_trial29']}, "Check parse phentoyping spreadsheet 1 ($extension)" ); + + #{'variables' => ['dry matter content|CO_334:0000092','fresh root weight|CO_334:0000012','fresh shoot weight|CO_334:0000016','harvest index|CO_334:0000015','notes'],'data' => {'test_trial23' => {'fresh shoot weight|CO_334:0000016' => ['22','2016-02-11 01:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 01:12:20-0500'],'harvest index|CO_334:0000015' => ['2.8','2016-03-16 01:12:20-0500'],'dry matter content|CO_334:0000092' => ['38','2016-04-27 01:12:20-0500']},'test_trial25' => {'fresh root weight|CO_334:0000012' => ['15','2016-01-15 09:12:20-0500'],'harvest index|CO_334:0000015' => ['4.8','2016-03-16 09:12:20-0500'],'dry matter content|CO_334:0000092' => ['35','2016-04-27 09:12:20-0500'],'fresh shoot weight|CO_334:0000016' => ['24','2016-02-11 09:12:20-0500']},'test_trial24' => {'fresh shoot weight|CO_334:0000016' => ['23','2016-02-11 11:12:20-0500'],'harvest index|CO_334:0000015' => ['3.8','2016-03-16 11:12:20-0500'],'dry matter content|CO_334:0000092' => ['39','2016-04-27 11:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 11:12:20-0500']},'test_trial22' => {'fresh root weight|CO_334:0000012' => ['15','2016-01-15 02:12:20-0500'],'notes' => ['testnote2',''],'harvest index|CO_334:0000015' => ['1.8','2016-03-16 02:12:20-0500'],'dry matter content|CO_334:0000092' => ['30','2016-04-27 02:12:20-0500'],'fresh shoot weight|CO_334:0000016' => ['21','2016-02-11 02:12:20-0500']},'test_trial210' => {'harvest index|CO_334:0000015' => ['9.8','2016-03-16 15:12:20-0500'],'dry matter content|CO_334:0000092' => ['30','2016-04-27 15:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 15:12:20-0500'],'fresh shoot weight|CO_334:0000016' => ['29','2016-02-11 15:12:20-0500']},'test_trial26' => {'fresh shoot weight|CO_334:0000016' => ['25','2016-02-11 16:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 16:12:20-0500'],'harvest index|CO_334:0000015' => ['5.8','2016-03-16 16:12:20-0500'],'dry matter content|CO_334:0000092' => ['30','2016-04-27 16:12:20-0500']},'test_trial211' => {'fresh shoot weight|CO_334:0000016' => ['30','2016-02-11 03:12:20-0500'],'harvest index|CO_334:0000015' => ['10.8','2016-03-16 03:12:20-0500'],'dry matter content|CO_334:0000092' => ['38','2016-04-27 03:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 03:12:20-0500']},'test_trial29' => {'fresh root weight|CO_334:0000012' => ['15','2016-01-15 14:12:20-0500'],'harvest index|CO_334:0000015' => ['8.8','2016-03-16 14:12:20-0500'],'dry matter content|CO_334:0000092' => ['35','2016-04-27 14:12:20-0500'],'fresh shoot weight|CO_334:0000016' => ['28','2016-02-11 14:12:20-0500']},'test_trial28' => {'fresh shoot weight|CO_334:0000016' => ['27','2016-02-11 13:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 13:12:20-0500'],'harvest index|CO_334:0000015' => ['7.8','2016-03-16 13:12:20-0500'],'dry matter content|CO_334:0000092' => ['39','2016-04-27 13:12:20-0500']},'test_trial27' => {'fresh shoot weight|CO_334:0000016' => ['26','2016-02-11 17:12:20-0500'],'harvest index|CO_334:0000015' => ['6.8','2016-03-16 17:12:20-0500'],'dry matter content|CO_334:0000092' => ['38','2016-04-27 17:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 17:12:20-0500']},'test_trial21' => {'notes' => ['test note1',''],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 12:12:20-0500'],'dry matter content|CO_334:0000092' => ['35','2016-04-27 12:12:20-0500'],'harvest index|CO_334:0000015' => ['0.8','2016-03-16 12:12:20-0500'],'fresh shoot weight|CO_334:0000016' => ['20','2016-02-11 12:12:20-0500']},'test_trial212' => {'fresh shoot weight|CO_334:0000016' => ['31','2016-02-11 21:12:20-0500'],'harvest index|CO_334:0000015' => ['11.8','2016-03-16 21:12:20-0500'],'dry matter content|CO_334:0000092' => ['39','2016-04-27 21:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 21:12:20-0500']},'test_trial215' => {'fresh shoot weight|CO_334:0000016' => ['34','2016-02-11 19:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 19:12:20-0500'],'dry matter content|CO_334:0000092' => ['38','2016-04-27 19:12:20-0500'],'harvest index|CO_334:0000015' => ['14.8','2016-03-16 19:12:20-0500']},'test_trial214' => {'fresh shoot weight|CO_334:0000016' => ['33','2016-02-11 23:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 23:12:20-0500'],'dry matter content|CO_334:0000092' => ['30','2016-04-27 23:12:20-0500'],'harvest index|CO_334:0000015' => ['13.8','2016-03-16 23:12:20-0500']},'test_trial213' => {'harvest index|CO_334:0000015' => ['12.8','2016-03-16 22:12:20-0500'],'dry matter content|CO_334:0000092' => ['35','2016-04-27 22:12:20-0500'],'fresh root weight|CO_334:0000012' => ['15','2016-01-15 22:12:20-0500'],'fresh shoot weight|CO_334:0000016' => ['32','2016-02-11 22:12:20-0500']}},'units' => ['test_trial21','test_trial210','test_trial211','test_trial212','test_trial213','test_trial214','test_trial215','test_trial22','test_trial23','test_trial24','test_trial25','test_trial26','test_trial27','test_trial28','test_trial29']}, "Check parse phenotyping spreadsheet 1 ($extension)" ); my %phenotype_metadata; @@ -144,8 +147,9 @@ for my $extension ("xls", "xlsx") { my ($verified_warning, $verified_error) = $store_phenotypes->verify(); ok(!$verified_error); my ($stored_phenotype_error_msg, $store_success) = $store_phenotypes->store(); - ok(!$stored_phenotype_error_msg, "check that store pheno spreadsheet works"); + ok(!$stored_phenotype_error_msg, "check that store pheno spreadsheet works 1"); + print STDERR "STORED PHENOTYPE ERROR MSG 1: $stored_phenotype_error_msg\n"; my $tn = CXGN::Trial->new( { bcs_schema => $f->bcs_schema(), trial_id => 137 }); @@ -276,7 +280,8 @@ for my $extension ("xls", "xlsx") { #print STDERR Dumper $verified_error; ok(!$verified_error); my ($stored_phenotype_error_msg, $store_success) = $store_phenotypes->store(); - ok(!$stored_phenotype_error_msg, "check that store pheno spreadsheet works"); + ok(!$stored_phenotype_error_msg, "check that store pheno spreadsheet works 2"); + print STDERR "ERROR MESSAGE FROM STORE: $stored_phenotype_error_msg\n"; my $refresh = $bs->refresh_matviews($f->config->{dbhost}, $f->config->{dbname}, $f->config->{dbuser}, $f->config->{dbpass}, 'phenotypes', 'concurrent', $f->config->{basepath}); @@ -316,11 +321,12 @@ for my $extension ("xls", "xlsx") { $validate_file = $parser->validate('phenotype spreadsheet simple generic', $archived_filename_with_path, 0, 'plots', $f->bcs_schema); ok($validate_file == 1, "Check if parse validate works for phenotype file"); - my $parsed_file = $parser->parse('phenotype spreadsheet simple generic', $archived_filename_with_path, 0, 'plots', $f->bcs_schema); - ok($parsed_file, "Check if parse parse phenotype spreadsheet works"); - is_deeply($parsed_file, {'data' => {'test_trial28' => {'dry matter content|CO_334:0000092' => ['139','']},'test_trial210' => {'dry matter content|CO_334:0000092' => ['130','']},'test_trial27' => {'dry matter content|CO_334:0000092' => ['138','']},'test_trial211' => {'dry matter content|CO_334:0000092' => ['138','']},'test_trial21' => {'dry matter content|CO_334:0000092' => ['135','']},'test_trial26' => {'dry matter content|CO_334:0000092' => ['','']},'test_trial214' => {'dry matter content|CO_334:0000092' => ['130','']},'test_trial213' => {'dry matter content|CO_334:0000092' => ['','']},'test_trial29' => {'dry matter content|CO_334:0000092' => ['135','']},'test_trial23' => {'dry matter content|CO_334:0000092' => ['','']},'test_trial24' => {'dry matter content|CO_334:0000092' => ['','']},'test_trial212' => {'dry matter content|CO_334:0000092' => ['139','']},'test_trial22' => {'dry matter content|CO_334:0000092' => ['130','']},'test_trial25' => {'dry matter content|CO_334:0000092' => ['135','']},'test_trial215' => {'dry matter content|CO_334:0000092' => ['138','']}},'units' => ['test_trial21','test_trial210','test_trial211','test_trial212','test_trial213','test_trial214','test_trial215','test_trial22','test_trial23','test_trial24','test_trial25','test_trial26','test_trial27','test_trial28','test_trial29'],'variables' => ['dry matter content|CO_334:0000092']}, "Check parse phenotyping spreadsheet" ); + my $parsed_file = $parser->parse('phenotype spreadsheet simple', $archived_filename_with_path, 0, 'plots', $f->bcs_schema); + ok($parsed_file, "Check if parse parse phenotype spreadsheet works ($extension)"); + is_deeply($parsed_file, + {'units' => ['test_trial21','test_trial210','test_trial211','test_trial212','test_trial213','test_trial214','test_trial215','test_trial22','test_trial23','test_trial24','test_trial25','test_trial26','test_trial27','test_trial28','test_trial29'],'data' => {'test_trial213' => {'dry matter content|CO_334:0000092' => [['','']]},'test_trial212' => {'dry matter content|CO_334:0000092' => [['139','']]},'test_trial21' => {'dry matter content|CO_334:0000092' => [['135','']]},'test_trial26' => {'dry matter content|CO_334:0000092' => [['','']]},'test_trial25' => {'dry matter content|CO_334:0000092' => [['135','']]},'test_trial210' => {'dry matter content|CO_334:0000092' => [['130','']]},'test_trial24' => {'dry matter content|CO_334:0000092' => [['','']]},'test_trial27' => {'dry matter content|CO_334:0000092' => [['138','']]},'test_trial23' => {'dry matter content|CO_334:0000092' => [['','']]},'test_trial211' => {'dry matter content|CO_334:0000092' => [['138','']]},'test_trial215' => {'dry matter content|CO_334:0000092' => [['138','']]},'test_trial22' => {'dry matter content|CO_334:0000092' => [['130','']]},'test_trial28' => {'dry matter content|CO_334:0000092' => [['139','']]},'test_trial29' => {'dry matter content|CO_334:0000092' => [['135','']]},'test_trial214' => {'dry matter content|CO_334:0000092' => [['130','']]}},'variables' => ['dry matter content|CO_334:0000092']}, "check parse phenotype spreadsheet 1 ($extension)"); my %phenotype_metadata; $phenotype_metadata{'archived_file'} = $archived_filename_with_path; @@ -354,8 +360,10 @@ for my $extension ("xls", "xlsx") { my ($verified_warning, $verified_error) = $store_phenotypes->verify(); ok(!$verified_error); my ($stored_phenotype_error_msg, $store_success) = $store_phenotypes->store(); - ok(!$stored_phenotype_error_msg, "check that store pheno spreadsheet works"); + ok(!$stored_phenotype_error_msg, "check that store pheno spreadsheet works 3"); + print STDERR "STORED PHENOTYPE ERROR MESG 3: $stored_phenotype_error_msg\n"; + my $tn = CXGN::Trial->new( { bcs_schema => $f->bcs_schema(), trial_id => 137 }); @@ -395,9 +403,11 @@ for my $extension ("xls", "xlsx") { $f->dbh()->rollback(); - $f->clean_up_db(); + } +$f->clean_up_db(); + done_testing(); #exit(0); diff --git a/t/unit_fixture/CXGN/Uploading/Phenotype_with_multi_categories.t b/t/unit_fixture/CXGN/Uploading/Phenotype_with_multi_categories.t index 3ff03c488c..2c6a7f3076 100644 --- a/t/unit_fixture/CXGN/Uploading/Phenotype_with_multi_categories.t +++ b/t/unit_fixture/CXGN/Uploading/Phenotype_with_multi_categories.t @@ -65,7 +65,7 @@ my $uploader = CXGN::UploadFile->new({ tempfile => $filename, subdirectory => 'temp_fieldbook', archive_path => '/tmp', - archive_filename => "upload_phenotypin_spreadsheet_multicategories.$extension", + archive_filename => "upload_phenotypin_spreadsheet_multicategories_with_errors.$extension", timestamp => $timestamp, user_id => 41, # janedoe in fixture user_role => 'curator' @@ -122,10 +122,10 @@ my $store_phenotypes = CXGN::Phenotypes::StorePhenotypes->new( composable_validation_check_name=>$f->config->{composable_validation_check_name} ); my ($verified_warning, $verified_error) = $store_phenotypes->verify(); -my $expected_error = 'This trait value should be one of 1/2/3/4/5:
    Plot Name: KASESE_TP2013_669
    Trait Name: CO_334:0000191
    Value: a:b

    '; +my $expected_error = ' This trait value should be one of 1/2/3/4/5:
    Plot Name: KASESE_TP2013_668
    Trait Name: CO_334:0000191
    Value: 2:b

    This trait value should be one of 1/2/3/4/5:
    Plot Name: KASESE_TP2013_669
    Trait Name: CO_334:0000191
    Value: a:b

    '; print STDERR "ERRORS DETECTED: ".Dumper($verified_error); -is($verified_error, $expected_error, "check error from store"); +like($verified_error, qr/This trait value should be one of /, "check error from store"); # do not try to store the previous data, is it is erroneous... instead load new file without errors @@ -133,7 +133,7 @@ is($verified_error, $expected_error, "check error from store"); # Upload file without errors to store -$filename = "t/data/trial/upload_phenotypin_spreadsheet_multicategories_with_errors.$extension"; +$filename = "t/data/trial/upload_phenotypin_spreadsheet_multicategories.$extension"; $time = DateTime->now(); $timestamp = $time->ymd()."_".$time->hms(); @@ -214,4 +214,3 @@ $f->clean_up_db(); done_testing(); - diff --git a/t/unit_fixture/CXGN/Uploading/Storebrapiobservations.t b/t/unit_fixture/CXGN/Uploading/Storebrapiobservations.t index 07e89cbfcd..356aecd3b3 100644 --- a/t/unit_fixture/CXGN/Uploading/Storebrapiobservations.t +++ b/t/unit_fixture/CXGN/Uploading/Storebrapiobservations.t @@ -121,11 +121,18 @@ foreach (@{$response->{result}->{observations}}){ delete $_->{observationDbId}; } +# the problem here is that the has is in random order, so to check +# we need to sort the observations for something more stable +# +my @sorted_observations = sort { $a->{germplasmName} cmp $b->{germplasmName} } @{ $response->{result}->{observations} }; + +$response->{result}->{observations} = \@sorted_observations; + is_deeply($response, { 'result' => { - 'observations' => [ - { - 'observationLevel' => 'plot', + 'observations' => [ + { + 'observationLevel' => 'plot', 'observationTimeStamp' => '2015-06-16T00:53:26Z', 'germplasmName' => 'test_accession3', 'observationUnitName' => 'test_trial210', @@ -140,6 +147,7 @@ is_deeply($response, { 'externalReferences' => undef, 'additionalInfo' => undef, }, + { 'observationLevel' => 'plot', 'observationTimeStamp' => '2015-06-16T00:53:26Z', @@ -155,7 +163,8 @@ is_deeply($response, { 'studyDbId' => 137, 'externalReferences' => undef, 'additionalInfo' => undef, - } + }, + ] }, 'metadata' => { diff --git a/t/unit_mech/AJAX/_BrAPIv2_phenotyping.t b/t/unit_mech/AJAX/_BrAPIv2_phenotyping.t index a32e61a00f..0789de2986 100644 --- a/t/unit_mech/AJAX/_BrAPIv2_phenotyping.t +++ b/t/unit_mech/AJAX/_BrAPIv2_phenotyping.t @@ -124,15 +124,245 @@ is_deeply($response, $expected, "GET observations pageSize 2 test"); $mech->get_ok('http://localhost:3010/brapi/v2/observations/740338'); $response = decode_json $mech->content; -#print STDERR "\n\n" . Dumper$response; +print STDERR "\n\n" . Dumper$response; #16 is_deeply($response, {'metadata' => {'datafiles' => [],'status' => [{'messageType' => 'INFO','message' => 'BrAPI base call found with page=0, pageSize=10'},{'messageType' => 'INFO','message' => 'Loading CXGN::BrAPI::v2::Observations'},{'messageType' => 'INFO','message' => 'Observations result constructed'}],'pagination' => {'totalCount' => 1,'totalPages' => 1,'currentPage' => 0,'pageSize' => 10}},'result' => {'externalReferences' => undef,'value' => '655.92','germplasmDbId' => '41283','season' => {'seasonDbId' => '2017','season' => '2017','year' => '2017'},'studyDbId' => '165','observationVariableName' => 'cass sink leaf|ADP|ug/g|week 16|COMP:0000010','observationVariableDbId' => '77556','observationUnitDbId' => '41284','germplasmName' => 'IITA-TMS-IBA980581','observationTimeStamp' => undef,'uploadedBy' => undef,'collector' => undef,'observationUnitName' => 'CASS_6Genotypes_103','observationDbId' => '740338','additionalInfo' => undef}}, "GET observations test"); $mech->get_ok('http://localhost:3010/brapi/v2/observations/table?pageSize=2'); $response = decode_json $mech->content; -print STDERR "\n\n" . Dumper$response; + print STDERR "\n reponse is here:" . Dumper $response; #17 -is_deeply($response, {'result' => {'observationVariables' => [{'observationVariableDbId' => '77559','observationVariableName' => 'cass sink leaf|3-phosphoglyceric acid|ug/g|week 16|COMP:0000013'},{'observationVariableName' => 'cass sink leaf|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000011','observationVariableDbId' => '77557'},{'observationVariableDbId' => '77556','observationVariableName' => 'cass sink leaf|ADP|ug/g|week 16|COMP:0000010'},{'observationVariableName' => 'cass source leaf|3-phosphoglyceric acid|ug/g|week 16|COMP:0000002','observationVariableDbId' => '77548'},{'observationVariableName' => 'cass source leaf|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000007','observationVariableDbId' => '77553'},{'observationVariableDbId' => '77549','observationVariableName' => 'cass source leaf|ADP|ug/g|week 16|COMP:0000003'},{'observationVariableName' => 'cass storage root|3-phosphoglyceric acid|ug/g|week 16|COMP:0000006','observationVariableDbId' => '77552'},{'observationVariableDbId' => '77550','observationVariableName' => 'cass storage root|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000004'},{'observationVariableDbId' => '77551','observationVariableName' => 'cass storage root|ADP|ug/g|week 16|COMP:0000005'},{'observationVariableDbId' => '77558','observationVariableName' => 'cass upper stem|3-phosphoglyceric acid|ug/g|week 16|COMP:0000012'},{'observationVariableDbId' => '77554','observationVariableName' => 'cass upper stem|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000008'},{'observationVariableName' => 'cass upper stem|ADP|ug/g|week 16|COMP:0000009','observationVariableDbId' => '77555'},{'observationVariableName' => 'dry matter content percentage|CO_334:0000092','observationVariableDbId' => '70741'},{'observationVariableName' => 'fresh root weight|CO_334:0000012','observationVariableDbId' => '70666'},{'observationVariableDbId' => '70773','observationVariableName' => 'fresh shoot weight measurement in kg|CO_334:0000016'},{'observationVariableDbId' => '70668','observationVariableName' => 'harvest index variable|CO_334:0000015'}],'data' => [[ '2014', 134, 'test', 'test', 139, 'Kasese solgs trial', 'This trial was loaded into the fixture to test solgs.', 'Alpha', undef, undef, undef, undef, undef, undef, undef, '23', 'test_location', 39086, 'UG120250', '', 'plot', 39691, 'KASESE_TP2013_1000', '1', '53', '36014', undef, undef, 'test', undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef ], [ '2014', 134, 'test', 'test', 139, 'Kasese solgs trial', 'This trial was loaded into the fixture to test solgs.', 'Alpha', undef, undef, undef, undef, undef, undef, undef, '23', 'test_location', 38960, 'UG120092', '', 'plot', 39493, 'KASESE_TP2013_1001', '1', '53', '36015', undef, undef, 'test', undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef ]],'headerRow' => ['studyYear','programDbId','programName','programDescription','studyDbId','studyName','studyDescription','studyDesign','plotWidth','plotLength','fieldSize','fieldTrialIsPlannedToBeGenotyped','fieldTrialIsPlannedToCross','plantingDate','harvestDate','locationDbId','locationName','germplasmDbId','germplasmName','germplasmSynonyms','observationLevel','observationUnitDbId','observationUnitName','replicate','blockNumber','plotNumber','rowNumber','colNumber','entryType','plantNumber']},'metadata' => {'pagination' => {'currentPage' => 0,'totalPages' => 508,'totalCount' => 1016,'pageSize' => 2},'status' => [{'message' => 'BrAPI base call found with page=0, pageSize=2','messageType' => 'INFO'},{'message' => 'Loading CXGN::BrAPI::v2::ObservationTables','messageType' => 'INFO'},{'message' => 'Observations table result constructed','messageType' => 'INFO'}],'datafiles' => []}}, "GET observations table test"); + +is_deeply($response, + { + 'result' => { + 'data' => [ + [ + '2014', + 134, + 'test', + 'test', + 139, + 'Kasese solgs trial', + 'This trial was loaded into the fixture to test solgs.', + 'Alpha', + undef, + undef, + undef, + undef, + undef, + undef, + undef, + '23', + 'test_location', + 39086, + 'UG120250', + '', + 'plot', + 39691, + 'KASESE_TP2013_1000', + '1', + '53', + '36014', + undef, + undef, + 'test', + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef + ], + [ + '2014', + 134, + 'test', + 'test', + 139, + 'Kasese solgs trial', + 'This trial was loaded into the fixture to test solgs.', + 'Alpha', + undef, + undef, + undef, + undef, + undef, + undef, + undef, + '23', + 'test_location', + 38960, + 'UG120092', + '', + 'plot', + 39493, + 'KASESE_TP2013_1001', + '1', + '53', + '36015', + undef, + undef, + 'test', + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef + ] + ], + 'observationVariables' => [ + { + 'observationVariableName' => 'cass sink leaf|3-phosphoglyceric acid|ug/g|week 16|COMP:0000013', + 'observationVariableDbId' => '77559' + }, + { + 'observationVariableDbId' => '77557', + 'observationVariableName' => 'cass sink leaf|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000011' + }, + { + 'observationVariableDbId' => '77556', + 'observationVariableName' => 'cass sink leaf|ADP|ug/g|week 16|COMP:0000010' + }, + { + 'observationVariableDbId' => '77548', + 'observationVariableName' => 'cass source leaf|3-phosphoglyceric acid|ug/g|week 16|COMP:0000002' + }, + { + 'observationVariableDbId' => '77553', + 'observationVariableName' => 'cass source leaf|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000007' + }, + { + 'observationVariableDbId' => '77549', + 'observationVariableName' => 'cass source leaf|ADP|ug/g|week 16|COMP:0000003' + }, + { + 'observationVariableDbId' => '77552', + 'observationVariableName' => 'cass storage root|3-phosphoglyceric acid|ug/g|week 16|COMP:0000006' + }, + { + 'observationVariableDbId' => '77550', + 'observationVariableName' => 'cass storage root|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000004' + }, + { + 'observationVariableName' => 'cass storage root|ADP|ug/g|week 16|COMP:0000005', + 'observationVariableDbId' => '77551' + }, + { + 'observationVariableDbId' => '77558', + 'observationVariableName' => 'cass upper stem|3-phosphoglyceric acid|ug/g|week 16|COMP:0000012' + }, + { + 'observationVariableDbId' => '77554', + 'observationVariableName' => 'cass upper stem|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000008' + }, + { + 'observationVariableName' => 'cass upper stem|ADP|ug/g|week 16|COMP:0000009', + 'observationVariableDbId' => '77555' + }, + { + 'observationVariableName' => 'dry matter content percentage|CO_334:0000092', + 'observationVariableDbId' => '70741' + }, + { + 'observationVariableDbId' => '70666', + 'observationVariableName' => 'fresh root weight|CO_334:0000012' + }, + { + 'observationVariableName' => 'fresh shoot weight measurement in kg|CO_334:0000016', + 'observationVariableDbId' => '70773' + }, + { + 'observationVariableDbId' => '70668', + 'observationVariableName' => 'harvest index variable|CO_334:0000015' + } + ], + 'headerRow' => [ + 'studyYear', + 'programDbId', + 'programName', + 'programDescription', + 'studyDbId', + 'studyName', + 'studyDescription', + 'studyDesign', + 'plotWidth', + 'plotLength', + 'fieldSize', + 'fieldTrialIsPlannedToBeGenotyped', + 'fieldTrialIsPlannedToCross', + 'plantingDate', + 'harvestDate', + 'locationDbId', + 'locationName', + 'germplasmDbId', + 'germplasmName', + 'germplasmSynonyms', + 'observationLevel', + 'observationUnitDbId', + 'observationUnitName', + 'replicate', + 'blockNumber', + 'plotNumber', + 'rowNumber', + 'colNumber', + 'entryType', + 'plantNumber' + ] + }, + 'metadata' => { + 'datafiles' => [], + 'status' => [ + { + 'messageType' => 'INFO', + 'message' => 'BrAPI base call found with page=0, pageSize=2' + }, + { + 'message' => 'Loading CXGN::BrAPI::v2::ObservationTables', + 'messageType' => 'INFO' + }, + { + 'message' => 'Observations table result constructed', + 'messageType' => 'INFO' + } + ], + 'pagination' => { + 'totalPages' => 508, + 'currentPage' => 0, + 'pageSize' => 2, + 'totalCount' => 1016 + } + } + }, "table test"); + + +# is_deeply($response, {'result' => {'observationVariables' => [{'observationVariableDbId' => '77559','observationVariableName' => 'cass sink leaf|3-phosphoglyceric acid|ug/g|week 16|COMP:0000013'},{'observationVariableName' => 'cass sink leaf|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000011','observationVariableDbId' => '77557'},{'observationVariableDbId' => '77556','observationVariableName' => 'cass sink leaf|ADP|ug/g|week 16|COMP:0000010'},{'observationVariableName' => 'cass source leaf|3-phosphoglyceric acid|ug/g|week 16|COMP:0000002','observationVariableDbId' => '77548'},{'observationVariableName' => 'cass source leaf|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000007','observationVariableDbId' => '77553'},{'observationVariableDbId' => '77549','observationVariableName' => 'cass source leaf|ADP|ug/g|week 16|COMP:0000003'},{'observationVariableName' => 'cass storage root|3-phosphoglyceric acid|ug/g|week 16|COMP:0000006','observationVariableDbId' => '77552'},{'observationVariableDbId' => '77550','observationVariableName' => 'cass storage root|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000004'},{'observationVariableDbId' => '77551','observationVariableName' => 'cass storage root|ADP|ug/g|week 16|COMP:0000005'},{'observationVariableDbId' => '77558','observationVariableName' => 'cass upper stem|3-phosphoglyceric acid|ug/g|week 16|COMP:0000012'},{'observationVariableDbId' => '77554','observationVariableName' => 'cass upper stem|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000008'},{'observationVariableName' => 'cass upper stem|ADP|ug/g|week 16|COMP:0000009','observationVariableDbId' => '77555'},{'observationVariableName' => 'dry matter content percentage|CO_334:0000092','observationVariableDbId' => '70741'},{'observationVariableName' => 'fresh root weight|CO_334:0000012','observationVariableDbId' => '70666'},{'observationVariableDbId' => '70773','observationVariableName' => 'fresh shoot weight measurement in kg|CO_334:0000016'},{'observationVariableDbId' => '70668','observationVariableName' => 'harvest index variable|CO_334:0000015'}],'data' => [[ '2014', 134, 'test', 'test', 139, 'Kasese solgs trial', 'This trial was loaded into the fixture to test solgs.', 'Alpha', undef, undef, undef, undef, undef, undef, undef, '23', 'test_location', 39086, 'UG120250', '', 'plot', 39691, 'KASESE_TP2013_1000', '1', '53', '36014', undef, undef, 'test', undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef ], [ '2014', 134, 'test', 'test', 139, 'Kasese solgs trial', 'This trial was loaded into the fixture to test solgs.', 'Alpha', undef, undef, undef, undef, undef, undef, undef, '23', 'test_location', 38960, 'UG120092', '', 'plot', 39493, 'KASESE_TP2013_1001', '1', '53', '36015', undef, undef, 'test', undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef ]],'headerRow' => ['studyYear','programDbId','programName','programDescription','studyDbId','studyName','studyDescription','studyDesign','plotWidth','plotLength','fieldSize','fieldTrialIsPlannedToBeGenotyped','fieldTrialIsPlannedToCross','plantingDate','harvestDate','locationDbId','locationName','germplasmDbId','germplasmName','germplasmSynonyms','observationLevel','observationUnitDbId','observationUnitName','replicate','blockNumber','plotNumber','rowNumber','colNumber','entryType','plantNumber']},'metadata' => {'pagination' => {'currentPage' => 0,'totalPages' => 508,'totalCount' => 1016,'pageSize' => 2},'status' => [{'message' => 'BrAPI base call found with page=0, pageSize=2','messageType' => 'INFO'},{'message' => 'Loading CXGN::BrAPI::v2::ObservationTables','messageType' => 'INFO'},{'message' => 'Observations table result constructed','messageType' => 'INFO'}],'datafiles' => []}}, "GET observations table test"); +#is_deeply ($response, {'result' => { 'headerRow' => ['studyYear','programDbId','programName','programDescription','studyDbId','studyName','studyDescription','studyDesign','plotWidth','plotLength','fieldSize','fieldTrialIsPlannedToBeGenotyped','fieldTrialIsPlannedToCross','plantingDate','harvestDate','locationDbId','locationName','germplasmDbId','germplasmName','germplasmSynonyms','observationLevel','observationUnitDbId','observationUnitName','replicate','blockNumber','plotNumber','rowNumber','colNumber','entryType','plantNumber'], 'data' => [['2014',134,'test','test',139,'Kasese solgs trial','This trial was loaded into the fixture to test solgs.', 'Alpha', undef, undef, undef, undef, undef, undef, undef, '23', 'test_location', 39086, 'UG120250', '','plot', 39691, 'KASESE_TP2013_1000', '1', '53', '36014', undef, undef, 'test', undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef,'0', '0', '0', undef, undef ], ['2014', 134, 'test', 'test', 139, 'Kasese solgs trial', 'This trial was loaded into the fixture to test solgs.', 'Alpha', undef, undef, undef, undef, undef, undef, undef, '23', 'test_location', 38960, 'UG120092', '', 'plot', 39493, 'KASESE_TP2013_1001', '1', '53', '36015', undef, undef, 'test', undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, '0', '0', '0', undef, undef]], 'observationVariables' => [{'observationVariableDbId' => '77559', 'observationVariableName' => 'cass sink leaf|3-phosphoglyceric acid|ug/g|week 16|COMP:0000013'},{'observationVariableDbId' => '77557','observationVariableName' => 'cass sink leaf|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000011'},{'observationVariableName' => 'cass sink leaf|ADP|ug/g|week 16|COMP:0000010','observationVariableDbId' => '77556'},{ 'observationVariableName' => 'cass source leaf|3-phosphoglyceric acid|ug/g|week 16|COMP:0000002', 'observationVariableDbId' => '77548'},{ 'observationVariableName' => 'cass source leaf|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000007', 'observationVariableDbId' => '77553'},{ 'observationVariableDbId' => '77549', 'observationVariableName' => 'cass source leaf|ADP|ug/g|week 16|COMP:0000003'},{ 'observationVariableDbId' => '77552', 'observationVariableName' => 'cass storage root|3-phosphoglyceric acid|ug/g|week 16|COMP:0000006'},{ 'observationVariableName' => 'cass storage root|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000004', 'observationVariableDbId' => '77550'},{ 'observationVariableDbId' => '77551', 'observationVariableName' => 'cass storage root|ADP|ug/g|week 16|COMP:0000005'},{ 'observationVariableName' => 'cass upper stem|3-phosphoglyceric acid|ug/g|week 16|COMP:0000012', 'observationVariableDbId' => '77558'},{ 'observationVariableDbId' => '77554', 'observationVariableName' => 'cass upper stem|ADP alpha-D-glucoside|ug/g|week 16|COMP:0000008'},{ 'observationVariableName' => 'cass upper stem|ADP|ug/g|week 16|COMP:0000009', 'observationVariableDbId' => '77555'},{ 'observationVariableDbId' => '70741', 'observationVariableName' => 'dry matter content percentage|CO_334:0000092'},{ 'observationVariableDbId' => '70666', 'observationVariableName' => 'fresh root weight|CO_334:0000012'},{ 'observationVariableName' => 'fresh shoot weight measurement in kg|CO_334:0000016', 'observationVariableDbId' => '70773'},{ 'observationVariableName' => 'harvest index variable|CO_334:0000015', 'observationVariableDbId' => '70668'}]}, 'metadata' => {'status' => [ {'message' => 'BrAPI base call found with page=0, pageSize=2', 'messageType' => 'INFO' }, {'message' => 'Loading CXGN::BrAPI::v2::ObservationTables', 'messageType' => 'INFO' }, {'message' => 'Observations table result constructed', 'messageType' => 'INFO'}], 'pagination' => { 'totalCount' => 1016, 'currentPage' => 0, 'pageSize' => 2, 'totalPages' => 508}, 'datafiles' => []}}, "GET OBSERVATION TABLE TEST"); $mech->post_ok('http://localhost:3010/brapi/v2/search/observations', ['pageSize'=>'2', 'observationDbIds' => ['740337']]); $response = decode_json $mech->content; @@ -178,7 +408,7 @@ $resp = $ua->put("http://localhost:3010/brapi/v2/observations/737987", Content = $response = decode_json $resp->{_content}; #print STDERR "\n\n--update" . Dumper$response; -is_deeply($response, { 'result' => {'data' => [ { 'collector' => 'John Doe', 'observationVariableDbId' => '70741', 'germplasmName' => 'UG130133', 'additionalInfo' => undef, 'germplasmDbId' => '39243', 'observationDbId' => '737987', 'observationUnitDbId' => '39548', 'observationUnitName' => 'KASESE_TP2013_1012', 'observationVariableName' => 'dry matter content percentage', 'observationTimeStamp' => '2023-01-01T14:47:23-06:10', 'observationLevel' => 'plot', 'value' => '500', 'uploadedBy' => 'John Doe', 'externalReferences' => [ { 'referenceId' => 'doi:10.155454/200', 'referenceSource' => 'DOI' } ], 'studyDbId' => '139' } ]}, 'metadata' => { 'status' => [ { 'messageType' => 'INFO', 'message' => 'BrAPI base call found with page=0, pageSize=10' }, { 'messageType' => 'INFO', 'message' => 'Loading CXGN::BrAPI::v2::Observations' }, { 'message' => 'Request structure is valid', 'messageType' => 'info' }, { 'messageType' => 'info', 'message' => 'Request data is valid' }, { 'messageType' => 'info', 'message' => 'File for incoming brapi obserations saved in archive.' }, { 'message' => 'All values in your file have been successfully processed!

    0 new values stored
    0 previously stored values skipped
    1 previously stored values overwritten
    0 previously stored values removed

    The following previously uploaded files are now obsolete because all values from them were overwritten by your upload: ', 'messageType' => 'INFO' } ], 'pagination' => { 'totalPages' => 1, 'pageSize' => 10, 'totalCount' => 1, 'currentPage' => 0 }, 'datafiles' => [] } } , "PUT observations detail test"); +is_deeply($response, { 'result' => {'data' => [ { 'collector' => 'John Doe', 'observationVariableDbId' => '70741', 'germplasmName' => 'UG130133', 'additionalInfo' => undef, 'germplasmDbId' => '39243', 'observationDbId' => '737987', 'observationUnitDbId' => '39548', 'observationUnitName' => 'KASESE_TP2013_1012', 'observationVariableName' => 'dry matter content percentage', 'observationTimeStamp' => '2023-01-01T14:47:23-06:10', 'observationLevel' => 'plot', 'value' => '500', 'uploadedBy' => 'John Doe', 'externalReferences' => [ { 'referenceId' => 'doi:10.155454/200', 'referenceSource' => 'DOI' } ], 'studyDbId' => '139' } ]}, 'metadata' => { 'status' => [ { 'messageType' => 'INFO', 'message' => 'BrAPI base call found with page=0, pageSize=10' }, { 'messageType' => 'INFO', 'message' => 'Loading CXGN::BrAPI::v2::Observations' }, { 'message' => 'Request structure is valid', 'messageType' => 'info' }, { 'messageType' => 'info', 'message' => 'Request data is valid' }, { 'messageType' => 'info', 'message' => 'File for incoming brapi obserations saved in archive.' }, { 'message' => 'All values in your file have been successfully processed!

    0 new values stored
    0 previously stored values skipped
    1 previously stored values overwritten
    0 previously stored values removed

    ', 'messageType' => 'INFO' } ], 'pagination' => { 'totalPages' => 1, 'pageSize' => 10, 'totalCount' => 1, 'currentPage' => 0 }, 'datafiles' => [] } } , "PUT observations detail test"); ####### Variables $mech->get_ok('http://localhost:3010/brapi/v2/variables?pageSize=2');