Skip to content

Commit

Permalink
implement rearrangement write functions and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
schristley committed Oct 17, 2024
1 parent 48b6ce6 commit a494e51
Show file tree
Hide file tree
Showing 3 changed files with 273 additions and 28 deletions.
211 changes: 191 additions & 20 deletions lang/js/io.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,8 @@ module.exports = function(airr) {
readable.pause();

if (validate) {
try {
schema.validate_header(headers);
} catch (err) {
reject(err);
}
let errors = schema.validate_header(headers);
if (errors) return reject(errors);
}

if (header_callback) {
Expand All @@ -61,16 +58,17 @@ module.exports = function(airr) {
readable.pause();

if (validate) {
try {
schema.validate_row(row);
} catch (err) {
reject(err);
}
let errors = schema.validate_object(row);
if (errors) return reject(errors);
}

if (row_callback) {
if (isPromise(row_callback)) await row_callback(row);
else row_callback(row);
} else {
// no reason to read the rows if no callback, so just end the stream
readable.destroy();
return resolve();
}

readable.resume();
Expand All @@ -81,12 +79,123 @@ module.exports = function(airr) {
});
}

airr.create_rearrangement = function(file) {
return null;
airr.create_rearrangement = async function(filename, row_callback, fields=null, debug=false) {
if (!row_callback) {
let msg = 'Row callback function not provided.';
if (debug) console.error(msg);
throw new Error(msg);
}

var is_gz = false;
var ext = filename.split('.').pop().toLowerCase();
if (ext == 'gz') is_gz = true;

var schema = new airr.SchemaDefinition('Rearrangement');

// order fields according to spec
var field_names = schema.required.slice();
if (fields) {
var additional_fields = [];
for (let f in fields) {
if (schema.required.includes(fields[f]))
continue;
else if (schema.properties.includes(fields[f]))
field_names.push(fields[f]);
else
additional_fields.push(fields[f]);
}
field_names = field_names.concat(additional_fields);
}

return new Promise(async function(resolve, reject) {
var writable = fs.createWriteStream(filename);
if (is_gz) writable.pipe(zlib.createGunzip());

// write header
writable.write(field_names.join('\t'));
writable.write('\n');

let row = null;
if (isPromise(row_callback)) row = await row_callback(field_names);
else row = row_callback(field_names);

while (row) {
let vals = [];
for (let i = 0; i < field_names.length; ++i) {
let p = field_names[i];
if (row[p] == undefined) vals.push('');
else vals.push(row[p]);
}
writable.write(vals.join('\t'));
writable.write('\n');

if (isPromise(row_callback)) row = await row_callback(field_names);
else row = row_callback(field_names);
}

writable.end();
return resolve();
});
}

airr.derive_rearrangement = function(file) {
return null;
airr.derive_rearrangement = async function(out_filename, in_filename, row_callback, fields=null, debug=false) {
if (!row_callback) {
let msg = 'Row callback function not provided.';
if (debug) console.error(msg);
throw new Error(msg);
}

var is_gz = false;
var ext = out_filename.split('.').pop().toLowerCase();
if (ext == 'gz') is_gz = true;

// get fields from input file
var field_names = null;
var got_headers = function(h) { field_names = h; }
await airr.read_rearrangement(in_filename, got_headers, null, false, debug)
.catch(function(error) { Promise.reject(error); });

// add any additional fields
if (fields) {
var additional_fields = [];
for (let f in fields) {
if (field_names.includes(fields[f]))
continue;
else
additional_fields.push(fields[f]);
}
field_names = field_names.concat(additional_fields);
}

return new Promise(async function(resolve, reject) {
var writable = fs.createWriteStream(out_filename);
if (is_gz) writable.pipe(zlib.createGunzip());

// write header
writable.write(field_names.join('\t'));
writable.write('\n');

let row = null;
if (isPromise(row_callback)) row = await row_callback(field_names);
else row = row_callback(field_names);

while (row) {
let vals = [];
for (let i = 0; i < field_names.length; ++i) {
let p = field_names[i];
if (row[p] == undefined) vals.push('');
else vals.push(row[p]);
}
writable.write(vals.join('\t'));
writable.write('\n');

if (isPromise(row_callback)) row = await row_callback(field_names);
else row = row_callback(field_names);
}

writable.end();
return resolve();
});
}

airr.load_rearrangement = async function(filename, validate=false, debug=false) {
Expand All @@ -99,16 +208,78 @@ module.exports = function(airr) {
return Promise.resolve(rows);
}

airr.dump_rearrangement = function(file) {
return null;
airr.dump_rearrangement = async function(data, filename, fields=null, debug=false) {
var idx = 0;
var row_callback = function(field_names) {
if (idx >= data.length) return null;
else return data[idx++];
};

return airr.create_rearrangement(filename, row_callback, fields, debug);
}

airr.merge_rearrangement = function(file) {
return null;
airr.merge_rearrangement = async function(out_filename, in_filenames, drop=false, debug=false) {
var is_gz = false;
var ext = out_filename.split('.').pop().toLowerCase();
if (ext == 'gz') is_gz = true;

// gather fields from input files
var first = true;
var field_names = [];
var got_headers = function(headers) {
if (first) {
field_names = headers;
first = false;
} else {
// intersection
if (drop) field_names = field_names.filter(value => headers.includes(value));
else { // or union
for (let h in headers) {
if (!field_names.includes(headers[h])) {
field_names.push(headers[h]);
}
}
}
}
}
for (let f in in_filenames) {
await airr.read_rearrangement(in_filenames[f], got_headers, null, false, debug)
.catch(function(error) { Promise.reject(error); });
}

// write input files to output file sequentially
return new Promise(async function(resolve, reject) {
var writable = fs.createWriteStream(out_filename);
if (is_gz) writable.pipe(zlib.createGunzip());

// write header
writable.write(field_names.join('\t'));
writable.write('\n');

var got_row = function(row) {
let vals = [];
for (let i = 0; i < field_names.length; ++i) {
let p = field_names[i];
if (row[p] == undefined) vals.push('');
else vals.push(row[p]);
}
writable.write(vals.join('\t'));
writable.write('\n');
}

for (let f in in_filenames) {
await airr.read_rearrangement(in_filenames[f], null, got_row, false, debug)
.catch(function(error) { Promise.reject(error); });
}

writable.end();
return resolve();
});
}

airr.validate_rearrangement = function(file) {
return null;
airr.validate_rearrangement = async function(filename, debug=false) {
var got_row = function(row) { };
return airr.read_rearrangement(filename, null, got_row, true, true, debug);
}

airr.read_airr = function(filename, validate=false, model=true, debug=false) {
Expand Down
9 changes: 8 additions & 1 deletion lang/js/schema.js
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,14 @@ module.exports = function(airr, schema) {
//

airr.SchemaDefinition.prototype.validate_header = function(header) {
return false;
var missing_fields = [];

// Check required fields
for (let f in this.required) {
if (!header.includes(this.required[f])) return missing_fields.push(this.required[f]);
}
if (missing_fields.length == 0) return null;
else return missing_fields;
}

airr.SchemaDefinition.prototype.validate_row = function(row) {
Expand Down
81 changes: 74 additions & 7 deletions lang/js/tests/airr.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,91 @@ var combined_yaml = path.resolve(data_path, 'good_combined_airr.yaml')
var combined_json = path.resolve(data_path, 'good_combined_airr.json')

// Output data
var output_rearrangement_good = path.resolve(data_path, 'output_good_rearrangement.tsv')
var output_rearrangement_good_gz = path.resolve(data_path, 'output_good_rearrangement.tsv.gz')
//var output_rep = os.path.join(data_path, 'output_rep.json')
var output_good = path.resolve(data_path, 'output_data.json')
var output_good_yaml = path.resolve(data_path, 'output_data.yaml')
//var output_blank = os.path.join(data_path, 'output_blank.json')

test('load schema', async () => {
const schema = await airr.load_schema();
expect(schema).not.toBeNull();
const schema = await airr.load_schema();
expect(schema).not.toBeNull();
});

test('load good airr yaml', () => {
const data = airr.read_airr(rep_good, true);
expect(data).not.toBeNull();
const data = airr.read_airr(rep_good, true);
expect(data).not.toBeNull();
});

test('load good rearrangement tsv', () => {
const data = airr.load_rearrangement(rearrangement_good, true);
expect(data).not.toBeNull();
test('load good rearrangement tsv', async () => {
const data = await airr.load_rearrangement(rearrangement_good, true);
expect(data).not.toBeNull();
});

test('write good AIRR Rearrangement TSV', async () => {
const data = await airr.load_rearrangement(rearrangement_good, true);
expect(data).not.toBeNull();

var idx = 0;
var row_callback = function(fields) {
if (idx >= data.length) return null;
else return data[idx++];
};

await airr.create_rearrangement(output_rearrangement_good, row_callback);

const new_data = await airr.load_rearrangement(output_rearrangement_good, true);
expect(new_data).not.toBeNull();
});

test('dump good AIRR Rearrangement TSV', async () => {
const data = await airr.load_rearrangement(rearrangement_good, true);
expect(data).not.toBeNull();

await airr.dump_rearrangement(data, output_rearrangement_good);

const new_data = await airr.load_rearrangement(output_rearrangement_good, true);
expect(new_data).not.toBeNull();
});

test('derive good AIRR Rearrangement TSV', async () => {
const data = await airr.load_rearrangement(rearrangement_good, true);
expect(data).not.toBeNull();

var idx = 0;
var row_callback = function(fields) {
if (idx >= data.length) return null;
else return data[idx++];
};

await airr.derive_rearrangement(output_rearrangement_good, rearrangement_good, row_callback);

const new_data = await airr.load_rearrangement(output_rearrangement_good, true);
expect(new_data).not.toBeNull();
});

test('validate good rearrangement tsv', async () => {
let isValid = true;
const data = await airr.validate_rearrangement(rearrangement_good, true)
.catch(function(error) { console.error(error); isValid = false; });
expect(data).not.toBeNull();
expect(isValid).toBe(true);
});

test('validate bad rearrangement tsv', async () => {
let isValid = true;
const data = await airr.validate_rearrangement(rearrangement_bad, true)
.catch(function(error) { console.error(error); isValid = false; });
expect(isValid).toBe(false);
});

test('merge good AIRR Rearrangement TSV', async () => {
var in_files = [ rearrangement_good, rearrangement_good, rearrangement_good];
await airr.merge_rearrangement(output_rearrangement_good, in_files, false, true);

const new_data = await airr.load_rearrangement(output_rearrangement_good, true);
expect(new_data).not.toBeNull();
});

test('write good AIRR DataFile', () => {
Expand Down

0 comments on commit a494e51

Please sign in to comment.