diff --git a/lang/js/io.js b/lang/js/io.js index bbc60469..82619214 100644 --- a/lang/js/io.js +++ b/lang/js/io.js @@ -43,11 +43,8 @@ module.exports = function(airr) { readable.pause(); if (validate) { - try { - schema.validate_header(headers); - } catch (err) { - reject(err); - } + let errors = schema.validate_header(headers); + if (errors) return reject(errors); } if (header_callback) { @@ -61,16 +58,17 @@ module.exports = function(airr) { readable.pause(); if (validate) { - try { - schema.validate_row(row); - } catch (err) { - reject(err); - } + let errors = schema.validate_object(row); + if (errors) return reject(errors); } if (row_callback) { if (isPromise(row_callback)) await row_callback(row); else row_callback(row); + } else { + // no reason to read the rows if no callback, so just end the stream + readable.destroy(); + return resolve(); } readable.resume(); @@ -81,12 +79,123 @@ module.exports = function(airr) { }); } - airr.create_rearrangement = function(file) { - return null; + airr.create_rearrangement = async function(filename, row_callback, fields=null, debug=false) { + if (!row_callback) { + let msg = 'Row callback function not provided.'; + if (debug) console.error(msg); + throw new Error(msg); + } + + var is_gz = false; + var ext = filename.split('.').pop().toLowerCase(); + if (ext == 'gz') is_gz = true; + + var schema = new airr.SchemaDefinition('Rearrangement'); + + // order fields according to spec + var field_names = schema.required.slice(); + if (fields) { + var additional_fields = []; + for (let f in fields) { + if (schema.required.includes(fields[f])) + continue; + else if (schema.properties.includes(fields[f])) + field_names.push(fields[f]); + else + additional_fields.push(fields[f]); + } + field_names = field_names.concat(additional_fields); + } + + return new Promise(async function(resolve, reject) { + var writable = fs.createWriteStream(filename); + if (is_gz) writable.pipe(zlib.createGunzip()); + + // write header + writable.write(field_names.join('\t')); + writable.write('\n'); + + let row = null; + if (isPromise(row_callback)) row = await row_callback(field_names); + else row = row_callback(field_names); + + while (row) { + let vals = []; + for (let i = 0; i < field_names.length; ++i) { + let p = field_names[i]; + if (row[p] == undefined) vals.push(''); + else vals.push(row[p]); + } + writable.write(vals.join('\t')); + writable.write('\n'); + + if (isPromise(row_callback)) row = await row_callback(field_names); + else row = row_callback(field_names); + } + + writable.end(); + return resolve(); + }); } - airr.derive_rearrangement = function(file) { - return null; + airr.derive_rearrangement = async function(out_filename, in_filename, row_callback, fields=null, debug=false) { + if (!row_callback) { + let msg = 'Row callback function not provided.'; + if (debug) console.error(msg); + throw new Error(msg); + } + + var is_gz = false; + var ext = out_filename.split('.').pop().toLowerCase(); + if (ext == 'gz') is_gz = true; + + // get fields from input file + var field_names = null; + var got_headers = function(h) { field_names = h; } + await airr.read_rearrangement(in_filename, got_headers, null, false, debug) + .catch(function(error) { Promise.reject(error); }); + + // add any additional fields + if (fields) { + var additional_fields = []; + for (let f in fields) { + if (field_names.includes(fields[f])) + continue; + else + additional_fields.push(fields[f]); + } + field_names = field_names.concat(additional_fields); + } + + return new Promise(async function(resolve, reject) { + var writable = fs.createWriteStream(out_filename); + if (is_gz) writable.pipe(zlib.createGunzip()); + + // write header + writable.write(field_names.join('\t')); + writable.write('\n'); + + let row = null; + if (isPromise(row_callback)) row = await row_callback(field_names); + else row = row_callback(field_names); + + while (row) { + let vals = []; + for (let i = 0; i < field_names.length; ++i) { + let p = field_names[i]; + if (row[p] == undefined) vals.push(''); + else vals.push(row[p]); + } + writable.write(vals.join('\t')); + writable.write('\n'); + + if (isPromise(row_callback)) row = await row_callback(field_names); + else row = row_callback(field_names); + } + + writable.end(); + return resolve(); + }); } airr.load_rearrangement = async function(filename, validate=false, debug=false) { @@ -99,16 +208,78 @@ module.exports = function(airr) { return Promise.resolve(rows); } - airr.dump_rearrangement = function(file) { - return null; + airr.dump_rearrangement = async function(data, filename, fields=null, debug=false) { + var idx = 0; + var row_callback = function(field_names) { + if (idx >= data.length) return null; + else return data[idx++]; + }; + + return airr.create_rearrangement(filename, row_callback, fields, debug); } - airr.merge_rearrangement = function(file) { - return null; + airr.merge_rearrangement = async function(out_filename, in_filenames, drop=false, debug=false) { + var is_gz = false; + var ext = out_filename.split('.').pop().toLowerCase(); + if (ext == 'gz') is_gz = true; + + // gather fields from input files + var first = true; + var field_names = []; + var got_headers = function(headers) { + if (first) { + field_names = headers; + first = false; + } else { + // intersection + if (drop) field_names = field_names.filter(value => headers.includes(value)); + else { // or union + for (let h in headers) { + if (!field_names.includes(headers[h])) { + field_names.push(headers[h]); + } + } + } + } + } + for (let f in in_filenames) { + await airr.read_rearrangement(in_filenames[f], got_headers, null, false, debug) + .catch(function(error) { Promise.reject(error); }); + } + + // write input files to output file sequentially + return new Promise(async function(resolve, reject) { + var writable = fs.createWriteStream(out_filename); + if (is_gz) writable.pipe(zlib.createGunzip()); + + // write header + writable.write(field_names.join('\t')); + writable.write('\n'); + + var got_row = function(row) { + let vals = []; + for (let i = 0; i < field_names.length; ++i) { + let p = field_names[i]; + if (row[p] == undefined) vals.push(''); + else vals.push(row[p]); + } + writable.write(vals.join('\t')); + writable.write('\n'); + } + + for (let f in in_filenames) { + await airr.read_rearrangement(in_filenames[f], null, got_row, false, debug) + .catch(function(error) { Promise.reject(error); }); + } + + writable.end(); + return resolve(); + }); } - airr.validate_rearrangement = function(file) { - return null; + airr.validate_rearrangement = async function(filename, debug=false) { + var got_row = function(row) { }; + return airr.read_rearrangement(filename, null, got_row, true, true, debug); } airr.read_airr = function(filename, validate=false, model=true, debug=false) { diff --git a/lang/js/schema.js b/lang/js/schema.js index 66326d5a..541dec6f 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -243,7 +243,14 @@ module.exports = function(airr, schema) { // airr.SchemaDefinition.prototype.validate_header = function(header) { - return false; + var missing_fields = []; + + // Check required fields + for (let f in this.required) { + if (!header.includes(this.required[f])) return missing_fields.push(this.required[f]); + } + if (missing_fields.length == 0) return null; + else return missing_fields; } airr.SchemaDefinition.prototype.validate_row = function(row) { diff --git a/lang/js/tests/airr.test.js b/lang/js/tests/airr.test.js index 5dbc7801..897e5502 100644 --- a/lang/js/tests/airr.test.js +++ b/lang/js/tests/airr.test.js @@ -27,24 +27,91 @@ var combined_yaml = path.resolve(data_path, 'good_combined_airr.yaml') var combined_json = path.resolve(data_path, 'good_combined_airr.json') // Output data +var output_rearrangement_good = path.resolve(data_path, 'output_good_rearrangement.tsv') +var output_rearrangement_good_gz = path.resolve(data_path, 'output_good_rearrangement.tsv.gz') //var output_rep = os.path.join(data_path, 'output_rep.json') var output_good = path.resolve(data_path, 'output_data.json') var output_good_yaml = path.resolve(data_path, 'output_data.yaml') //var output_blank = os.path.join(data_path, 'output_blank.json') test('load schema', async () => { - const schema = await airr.load_schema(); - expect(schema).not.toBeNull(); + const schema = await airr.load_schema(); + expect(schema).not.toBeNull(); }); test('load good airr yaml', () => { - const data = airr.read_airr(rep_good, true); - expect(data).not.toBeNull(); + const data = airr.read_airr(rep_good, true); + expect(data).not.toBeNull(); }); -test('load good rearrangement tsv', () => { - const data = airr.load_rearrangement(rearrangement_good, true); - expect(data).not.toBeNull(); +test('load good rearrangement tsv', async () => { + const data = await airr.load_rearrangement(rearrangement_good, true); + expect(data).not.toBeNull(); +}); + +test('write good AIRR Rearrangement TSV', async () => { + const data = await airr.load_rearrangement(rearrangement_good, true); + expect(data).not.toBeNull(); + + var idx = 0; + var row_callback = function(fields) { + if (idx >= data.length) return null; + else return data[idx++]; + }; + + await airr.create_rearrangement(output_rearrangement_good, row_callback); + + const new_data = await airr.load_rearrangement(output_rearrangement_good, true); + expect(new_data).not.toBeNull(); +}); + +test('dump good AIRR Rearrangement TSV', async () => { + const data = await airr.load_rearrangement(rearrangement_good, true); + expect(data).not.toBeNull(); + + await airr.dump_rearrangement(data, output_rearrangement_good); + + const new_data = await airr.load_rearrangement(output_rearrangement_good, true); + expect(new_data).not.toBeNull(); +}); + +test('derive good AIRR Rearrangement TSV', async () => { + const data = await airr.load_rearrangement(rearrangement_good, true); + expect(data).not.toBeNull(); + + var idx = 0; + var row_callback = function(fields) { + if (idx >= data.length) return null; + else return data[idx++]; + }; + + await airr.derive_rearrangement(output_rearrangement_good, rearrangement_good, row_callback); + + const new_data = await airr.load_rearrangement(output_rearrangement_good, true); + expect(new_data).not.toBeNull(); +}); + +test('validate good rearrangement tsv', async () => { + let isValid = true; + const data = await airr.validate_rearrangement(rearrangement_good, true) + .catch(function(error) { console.error(error); isValid = false; }); + expect(data).not.toBeNull(); + expect(isValid).toBe(true); +}); + +test('validate bad rearrangement tsv', async () => { + let isValid = true; + const data = await airr.validate_rearrangement(rearrangement_bad, true) + .catch(function(error) { console.error(error); isValid = false; }); + expect(isValid).toBe(false); +}); + +test('merge good AIRR Rearrangement TSV', async () => { + var in_files = [ rearrangement_good, rearrangement_good, rearrangement_good]; + await airr.merge_rearrangement(output_rearrangement_good, in_files, false, true); + + const new_data = await airr.load_rearrangement(output_rearrangement_good, true); + expect(new_data).not.toBeNull(); }); test('write good AIRR DataFile', () => {