diff --git a/src/lib.rs b/src/lib.rs index 16de0d94..296fd7cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -969,31 +969,32 @@ pub async fn configure_db( // use that information to create the associated database tables, while saving constraint // information to constrains_config. let mut setup_statements = HashMap::new(); - let table_names: Vec = tables_config.keys().cloned().collect(); - for table_name in table_names { + for table_name in tables_config.keys().cloned().collect::>() { let optional_path = tables_config .get(&table_name) .and_then(|r| r.get("path")) .and_then(|p| p.as_str()); - let path; + let mut path = None; match optional_path { - // If an entry of the tables_config has no path then it is an internal table which need - // not be configured explicitly. Currently the only example is the message table. - None => continue, + None => { + // If an entry of the tables_config has no path then it is an internal table which + // need not be configured explicitly. Currently the only examples are the message + // and history tables. + if table_name != "message" && table_name != "history" { + panic!("No path defined for table {}", table_name); + } + continue; + } Some(p) if !Path::new(p).is_file() => { eprintln!("WARN: File does not exist {}", p); - continue; } Some(p) if Path::new(p).canonicalize().is_err() => { eprintln!("WARN: File path could not be made canonical {}", p); - continue; } - - Some(p) => path = p.to_string(), + Some(p) => path = Some(p.to_string()), }; - // Get the columns that have been previously configured: let defined_columns: Vec = tables_config .get(&table_name) .and_then(|r| r.get("column")) @@ -1003,65 +1004,58 @@ pub async fn configure_db( .and_then(|k| Some(k.collect())) .unwrap(); - // Get the actual columns from the data itself. Note that we set has_headers to false - // (even though the files have header rows) in order to explicitly read the header row. - let mut rdr = csv::ReaderBuilder::new() - .has_headers(false) - .delimiter(b'\t') - .from_reader(File::open(path.clone()).unwrap_or_else(|err| { - panic!("Unable to open '{}': {}", path.clone(), err); - })); - let mut iter = rdr.records(); - let actual_columns; - if let Some(result) = iter.next() { - actual_columns = result.unwrap(); - } else { - panic!("'{}' is empty", path); - } - // We use column_order to explicitly indicate the order in which the columns should appear - // in the table, for later reference. + // in the table, for later reference. The default is to preserve the order from the actual + // table file. If that does not exist, we use the ordering in defined_columns. let mut column_order = vec![]; - let mut all_columns: SerdeMap = SerdeMap::new(); - for column_name in &actual_columns { - let column; - if !defined_columns.contains(&column_name.to_string()) { - let mut cmap = SerdeMap::new(); - cmap.insert( - String::from("table"), - SerdeValue::String(table_name.to_string()), - ); - cmap.insert( - String::from("column"), - SerdeValue::String(column_name.to_string()), - ); - cmap.insert( - String::from("nulltype"), - SerdeValue::String(String::from("empty")), - ); - cmap.insert( - String::from("datatype"), - SerdeValue::String(String::from("text")), - ); - column = SerdeValue::Object(cmap); - } else { - column = tables_config - .get(&table_name) - .and_then(|r| r.get("column")) - .and_then(|v| v.as_object()) - .and_then(|o| o.get(column_name)) + if let Some(path) = path { + // Get the actual columns from the data itself. Note that we set has_headers to + // false(even though the files have header rows) in order to explicitly read the + // header row. + let mut rdr = csv::ReaderBuilder::new() + .has_headers(false) + .delimiter(b'\t') + .from_reader(File::open(path.clone()).unwrap_or_else(|err| { + panic!("Unable to open '{}': {}", path.clone(), err); + })); + let mut iter = rdr.records(); + if let Some(result) = iter.next() { + let actual_columns = result .unwrap() - .clone(); + .iter() + .map(|c| c.to_string()) + .collect::>(); + // Make sure that the actual columns found in the table file, and the columns + // defined in the column config, exactly match in terms of their content: + for column_name in &actual_columns { + column_order.push(json!(column_name)); + if !defined_columns.contains(&column_name.to_string()) { + panic!( + "Column '{}.{}' not in column config", + table_name, column_name + ); + } + } + for column_name in &defined_columns { + if !actual_columns.contains(&column_name.to_string()) { + panic!( + "Defined column '{}.{}' not found in table", + table_name, column_name + ); + } + } + } else { + panic!("'{}' is empty", path); } - column_order.push(SerdeValue::String(column_name.to_string())); - all_columns.insert(column_name.to_string(), column); } + if column_order.is_empty() { + column_order = defined_columns.iter().map(|c| json!(c)).collect::>(); + } tables_config .get_mut(&table_name) .and_then(|t| t.as_object_mut()) .and_then(|o| { - o.insert(String::from("column"), SerdeValue::Object(all_columns)); o.insert( String::from("column_order"), SerdeValue::Array(column_order), @@ -1097,9 +1091,11 @@ pub async fn configure_db( } // Sort the tables according to their foreign key dependencies so that tables are always loaded - // after the tables they depend on: - let unsorted_tables: Vec = setup_statements.keys().cloned().collect(); - let sorted_tables = verify_table_deps_and_sort(&unsorted_tables, &constraints_config); + // after the tables they depend on. Ignore the internal message and history tables: + let sorted_tables = verify_table_deps_and_sort( + &setup_statements.keys().cloned().collect(), + &constraints_config, + ); if *command != ValveCommand::Config || verbose { // Generate DDL for the history table: diff --git a/test/expected/table3.tsv b/test/expected/table3.tsv index 04c78efc..c0f31eda 100644 --- a/test/expected/table3.tsv +++ b/test/expected/table3.tsv @@ -1,12 +1,12 @@ -source id label type parent -MOB MOB:0000013 mobecular entity owl:Class material entity -ZOB ZOB:0000013 bar owl:Class car -JOB JOB:0000013 car owl:Class foo -SOB SOB:0000013 foo owl:Class bar -YOB YOB:0000013 mar owl:Class jafar -COB BFO:0000040 material entity owl:Class owl:Thing -CO B COB:0000013 molecular dentity owl:Class material entity -COB COB:0000013 molecular entity owl:Class material entity -COB VO:0000001 vaccine owl:Class material entity -BOB VO:0000001 vaccine owl:Class material entity -BFOBBER BFO:0000027 bazaar owl:Class barrie +source id label type parent related +MOB MOB:0000013 mobecular entity owl:Class material entity +ZOB ZOB:0000013 bar owl:Class car +JOB JOB:0000013 car owl:Class foo +SOB SOB:0000013 foo owl:Class bar +YOB YOB:0000013 mar owl:Class jafar +COB BFO:0000040 material entity owl:Class owl:Thing +CO B COB:0000013 molecular dentity owl:Class material entity +COB COB:0000013 molecular entity owl:Class material entity +COB VO:0000001 vaccine owl:Class material entity +BOB VO:0000001 vaccine owl:Class material entity +BFOBBER BFO:0000027 bazaar owl:Class barrie diff --git a/test/random_test_data/column.tsv b/test/random_test_data/column.tsv index 80268a30..f7e6a20e 100644 --- a/test/random_test_data/column.tsv +++ b/test/random_test_data/column.tsv @@ -12,7 +12,14 @@ column structure empty trimmed_line schema information for this column column description empty description a description of this column datatype datatype datatype_name primary the name of this datatype datatype parent empty datatype_name tree(datatype) the parent datatype +datatype transform empty word datatype condition empty datatype_condition +datatype structure empty trimmed_line +datatype description empty trimmed_text +datatype SQLite type empty datatype_name +datatype PostgreSQL type empty datatype_name +datatype RDF type empty datatype_name +datatype HTML type empty datatype_name rule table table_name rule when column column_name rule when condition datatype_condition diff --git a/test/src/column.tsv b/test/src/column.tsv index 07f38290..9c6c8256 100644 --- a/test/src/column.tsv +++ b/test/src/column.tsv @@ -12,7 +12,14 @@ column structure empty trimmed_line schema information for this column column description empty description a description of this column datatype datatype datatype_name primary the name of this datatype datatype parent empty datatype_name tree(datatype) the parent datatype +datatype transform empty word datatype condition empty datatype_condition +datatype structure empty trimmed_line +datatype description empty trimmed_text +datatype SQLite type empty trimmed_line +datatype PostgreSQL type empty trimmed_line +datatype RDF type empty trimmed_line +datatype HTML type empty datatype_name rule table table_name rule when column column_name rule when condition datatype_condition @@ -32,6 +39,7 @@ table2 bar empty text table3 source prefix from(table1.prefix) table3 id CURIE unique table3 label label primary +table3 type empty CURIE table3 parent empty label tree(label) table3 related empty trimmed_line table4 foreign_column text unique diff --git a/test/src/ontology/table3.tsv b/test/src/ontology/table3.tsv index 710e1e16..e8d75e99 100644 --- a/test/src/ontology/table3.tsv +++ b/test/src/ontology/table3.tsv @@ -1,11 +1,11 @@ -source id label type parent -MOB MOB:0000013 mobecular entity owl:Class material entity -ZOB ZOB:0000013 bar owl:Class car -JOB JOB:0000013 car owl:Class foo -SOB SOB:0000013 foo owl:Class bar -YOB YOB:0000013 mar owl:Class jafar -COB BFO:0000040 material entity owl:Class owl:Thing -CO B COB:0000013 molecular dentity owl:Class material entity -COB COB:0000013 molecular entity owl:Class material entity -COB VO:0000001 vaccine owl:Class material entity -BOB VO:0000001 vaccine owl:Class material entity +source id label type parent related +MOB MOB:0000013 mobecular entity owl:Class material entity +ZOB ZOB:0000013 bar owl:Class car +JOB JOB:0000013 car owl:Class foo +SOB SOB:0000013 foo owl:Class bar +YOB YOB:0000013 mar owl:Class jafar +COB BFO:0000040 material entity owl:Class owl:Thing +CO B COB:0000013 molecular dentity owl:Class material entity +COB COB:0000013 molecular entity owl:Class material entity +COB VO:0000001 vaccine owl:Class material entity +BOB VO:0000001 vaccine owl:Class material entity