diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh index ed843e0e..28180cf1 100755 --- a/build_tools/travis/install.sh +++ b/build_tools/travis/install.sh @@ -48,7 +48,7 @@ else pip install --upgrade autograd fi - +pip install rmsd pip install cython diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh index 217e8863..5965ac0c 100644 --- a/build_tools/travis/test_script.sh +++ b/build_tools/travis/test_script.sh @@ -32,12 +32,19 @@ run_tests() { } +run_pastis_to_pdb_tests() { + pushd examples/test_pastis_to_pdb + bash launch_tests.sh + popd +} + if [[ "$RUN_FLAKE8" == "true" ]]; then source build_tools/travis/flake8_diff.sh fi if [[ "$SKIP_TESTS" != "true" ]]; then run_tests + run_pastis_to_pdb_tests fi if [[ "$BUILD_DOC" == "true" ]] ; then diff --git a/examples/test_pastis_to_pdb/launch_tests.sh b/examples/test_pastis_to_pdb/launch_tests.sh new file mode 100755 index 00000000..7b15e8b4 --- /dev/null +++ b/examples/test_pastis_to_pdb/launch_tests.sh @@ -0,0 +1,23 @@ +# Test conversion script + +# Haploid, no interpolate +python pastis_to_pdb.py --input_file test_script/haploid.coords --output_file test_script/test1.pdb --interpolate 0 --ploidy 1 + +python test_script.py --pastis_coords_file test_script/haploid.coords --pdb_file test_script/test1.pdb --interpolate 0 --ploidy 1 + +# Haploid, interpolate +python pastis_to_pdb.py --input_file test_script/haploid.coords --output_file test_script/test1.pdb --interpolate 1 --ploidy 1 + +python test_script.py --pastis_coords_file test_script/haploid.coords --pdb_file test_script/test1.pdb --interpolate 1 --ploidy 1 + +# Diploid, no interpolate +python pastis_to_pdb.py --input_file test_script/diploid.coords --output_file test_script/test1.pdb --interpolate 0 --ploidy 2 + +python test_script.py --pastis_coords_file test_script/diploid.coords --pdb_file test_script/test1.pdb --interpolate 0 --ploidy 2 + +# Diploid, interpolate +python pastis_to_pdb.py --input_file test_script/diploid.coords --output_file test_script/test1.pdb --interpolate 1 --ploidy 2 --bed_file test_script/counts.bed + +python test_script.py --pastis_coords_file test_script/diploid.coords --pdb_file test_script/test1.pdb --interpolate 1 --ploidy 2 + +rm test_script/test1.pdb diff --git a/examples/test_pastis_to_pdb/pastis_to_pdb.py b/examples/test_pastis_to_pdb/pastis_to_pdb.py new file mode 100644 index 00000000..8c5539ac --- /dev/null +++ b/examples/test_pastis_to_pdb/pastis_to_pdb.py @@ -0,0 +1,644 @@ +from __future__ import print_function +import numpy as np +from scipy import linalg +from sklearn.metrics import euclidean_distances + +# 160907 GBonora +# Python implementation of 'print_pdb_atom' function in coords.cpp + + +def fprintf(fp, fmt, *args): + """ + Simply an alias for fprintf to allow Python implementation to remain + similar to original C version. + Parameters + ---------- + fp : output file + fmt : C-style format string + args: arguments for format string + """ + fp.write(fmt % args) + + +def print_pdb_atom(outfile, + atom_index, + is_node, + atom_name, + scale_factor, + my_coords): + """ + Prints one line to PDB file. + Parameters + ---------- + outfile : output file + atom_index : integer + 'atom' ID + is_node : boolean + Is this a node or an edge atom? + atom_name : string + eg "N", "O", or "C" + scale_factor : integer + 100 for a sphere with radius 1, 1000 for a sphere with radius 10 + my_coords : + 3D co-ordinates to ouput + """ + # prev_chrom_index = -1 + # atom_index + # if chrom_index != prev_chrom_index: + # atom_index = 1 + # prev_chrom_index = chrom_index + + # http://www.biochem.ucl.ac.uk/~roman/procheck/manual/manappb.html + fprintf(outfile, "ATOM ") # 1- 6: Record ID + fprintf(outfile, "%5d", atom_index) # 7-11: Atom serial number + fprintf(outfile, " ") # 12: Blank + # if (is_node) { # 13-16: Atom name + # fprintf(outfile, "N ") + # } else { + # fprintf(outfile, "O ") + # } + fprintf(outfile, "%s ", atom_name) + fprintf(outfile, " ") # 17-17: Alternative location code + if is_node: # 18-20: 3-letter amino acid code + fprintf(outfile, "NOD") + else: + fprintf(outfile, "EDG") + fprintf(outfile, " ") # 21: Blank + fprintf(outfile, "%c", # 22: Chain identifier code + # get_chrom_id(chrom_index, copy_index)) + 'A') + fprintf(outfile, " ") # 23-26: Residue sequence number + fprintf(outfile, " ") # 27: Insertion code + fprintf(outfile, " ") # 28-30: Blank + fprintf(outfile, "%8.3f%8.3f%8.3f", # 31-54: Atom coordinates + # (my_coords->x + 1.0) * SCALE_FACTOR, + # (my_coords->y + 1.0) * SCALE_FACTOR, + # (my_coords->z + 1.0) * SCALE_FACTOR) + (my_coords[0] + 1.0) * scale_factor, + (my_coords[1] + 1.0) * scale_factor, + (my_coords[2] + 1.0) * scale_factor) + fprintf(outfile, "%6.2f", 1.0) # 55-60: Occupancy value + if is_node: + fprintf(outfile, "%6.2f", 50.0) # 61-66: B-value (thermal factor) + else: + fprintf(outfile, "%6.2f", 75.0) # 61-66: B-value (thermal factor) + fprintf(outfile, " ") # 67: Blank + fprintf(outfile, " ") # 68-70: Blank + fprintf(outfile, "\n") + + +def writePDB(Xpdb, bead_truths, pdbfilename): + """ + Write 3D ouput from pastis to PDB file. + Parameters + ---------- + Xpdb : nparray + the 3D co-ordinates + bead_truths: boolean list indicating which coordinates are originally + output by PASTIS + pdbfilename : File to write PDB file to. + """ + # print('PDB file creation!') + atom_name = 'O' + scale_factor = 10 + # pdboutfile = open(pdbfilename,'w') + with open(pdbfilename, "w") as pdboutfile: + for coordIdx in range(0, np.shape(Xpdb)[0]): + # print coordIdx + if coordIdx == 0 or coordIdx == np.shape(Xpdb)[0]: + is_node = True + else: + is_node = False + my_coords = Xpdb[coordIdx, :] + if bead_truths is not None: + if not bead_truths[coordIdx]: + atom_name = 'H' + else: + atom_name = 'O' + print_pdb_atom(pdboutfile, + # chrom_index, + # copy_index, + coordIdx, + is_node, # Is this a node or an edge atom + atom_name, # eg "N", "O", or "C", + scale_factor, + my_coords) + # pdboutfile.close() + + +def realignment_error(X, Y, error_type): + """ + If an error occurs during realignment, processes it. + """ + if error_type.lower() == 'rmsd': + return np.sqrt(((X - Y) ** 2.).sum() / len(X)) + elif error_type.lower() == 'distanceerror': + return np.sqrt(( + (euclidean_distances(X) - euclidean_distances(Y)) ** 2.).sum()) + else: + raise ValueError('Error error_type must be rmsd or distanceerror') + + +def realign_structures(X, Y, rescale=False, copy=True, verbose=False, + error_type='rmsd'): + """ + Realigns Y and X + + Parameters + ---------- + X : ndarray (n, 3) + First 3D structure + + Y : ndarray (n, 3) + Second 3D structure + + rescale : boolean, optional, default: False + Whether to rescale Y or not. + + copy : boolean, optional, default: True + Whether to copy the data or not + + verbose : boolean, optional, default: False + The level of verbosity + + Returns + ------- + Y : ndarray (n, 3) + Realigned 3D, Xstructure + """ + if copy: + Y = Y.copy() + X = X.copy() + + mask = np.invert(np.isnan(X[:, 0]) | np.isnan(Y[:, 0])) + + if rescale: + Y, _, _, _ = realign_structures(X, Y) + if error_type.lower() == 'rmsd': + alpha = (X[mask] * Y[mask]).sum() / (Y[mask] ** 2).sum() + elif error_type.lower() == 'distanceerror': + dis_X = euclidean_distances(X[mask]) + dis_Y = euclidean_distances(Y[mask]) + alpha = (dis_X * dis_Y).sum() / (dis_Y ** 2).sum() + + Y *= alpha + + X -= np.nanmean(X, axis=0) + Y -= np.nanmean(Y, axis=0) + + K = np.dot(X[mask].T, Y[mask]) + U, L, V = linalg.svd(K) + V = V.T + + R = np.dot(V, U.T) + if linalg.det(R) < 0: + if verbose: + print("Reflexion found") + V[:, -1] *= -1 + R = np.dot(V, U.T) + Y_fit = np.dot(Y, R) + + error = realignment_error(X[mask], Y_fit[mask], error_type) + + # Check at the mirror + Y_mirror = Y.copy() + Y_mirror[:, 0] = - Y[:, 0] + + K = np.dot(X[mask].T, Y_mirror[mask]) + U, L, V = linalg.svd(K) + V = V.T + if linalg.det(V) < 0: + V[:, -1] *= -1 + + R_mirror = np.dot(V, U.T) + Y_mirror_fit = np.dot(Y_mirror, R_mirror) + error_mirror = realignment_error(X[mask], Y_mirror_fit[mask], error_type) + + if error <= error_mirror: + best_Y_fit = Y_fit + best_error = error + mirror = False + best_R = R + else: + if verbose: + print("Reflexion is better") + best_Y_fit = Y_mirror_fit + best_error = error_mirror + mirror = True + best_R = R_mirror + + return best_Y_fit, best_error, mirror, best_R + + +def find_rotation(X, Y, copy=True): + """ + Finds the rotation matrice C such that |x - Q.T Y| is minimum. + + Parameters + ---------- + X : ndarray (n, 3) + First 3D structure + + Y : ndarray (n, 3) + Second 3D structure + + copy : boolean, optional, default: True + Whether to copy the data or not + + Returns + ------- + Y : ndarray (n, 3) + Realigned 3D structure + """ + if copy: + Y = Y.copy() + X = X.copy() + mask = np.invert(np.isnan(X[:, 0]) | np.isnan(Y[:, 0])) + K = np.dot(X[mask].T, Y[mask]) + U, L, V = np.linalg.svd(K) + V = V.T + + t = np.array([[1, 0, 0], + [0, 1, 0], + [0, 0, np.linalg.det(np.dot(V, U.T))]]) + R = np.dot(V, np.dot(t, U.T)) + Y_fit = np.dot(Y, R) + X_mean = X[mask].mean() + Y_fit -= Y_fit[mask].mean() - X_mean + + # Check at the mirror + Y_mirror = Y.copy() + Y_mirror[:, 0] = - Y[:, 0] + + K = np.dot(X[mask].T, Y_mirror[mask]) + U, L, V = np.linalg.svd(K) + V = V.T + + t = np.array([[1, 0, 0], + [0, 1, 0], + [0, 0, np.linalg.det(np.dot(V, U.T))]]) + R_ = np.dot(V, np.dot(t, U.T)) + Y_mirror_fit = np.dot(Y_mirror, R_) + Y_mirror_fit -= Y_mirror[mask].mean() - X_mean + return R + + +def distance_between_structures(X, Y): + """ + Computes the distances per loci between structures + + Parameters + ---------- + X : ndarray (n, l) + First 3D structure + + Y : ndarray (n, l) + Second 3D structure + + Returns + ------- + distances : (n, ) + Distances between the 2 structures + """ + if X.shape != Y.shape: + raise ValueError("Shapes of the two matrics need to be the same") + + return np.sqrt(((X - Y) ** 2).sum(axis=1)) + + +def _round_struct_for_pdb(struct, max_num_char=8): + """ + Rounds the structure for PDB + """ + struct = np.asarray(struct) + struct_abs = np.where( + np.isfinite(struct) & (struct != 0), + np.abs(struct), 10 ** (max_num_char - 1)) + round_factor = 10 ** (max_num_char - 1 - np.floor(np.log10(struct_abs))) + round_factor = np.where( + np.floor(np.log10(struct_abs)) < max_num_char, + round_factor / 10, round_factor) + return np.round(struct * round_factor) / round_factor + + +def _resize_struct_for_pdb(struct, max_num_char=8): + """ + Resizes the structure for PDB + """ + max_coord_range = (struct.max(axis=0) - struct.min(axis=0)).max() + resize_factor = 10 ** (max_num_char - 1 - np.floor( + np.log10(max_coord_range))) + resized_struct = struct * resize_factor + resized_struct -= resized_struct.min(axis=0) + return resized_struct + + +def realign_struct_for_plotting(struct_to_plot, struct_to_match, + rescale=False): + """ + Realigns the structure for plotting + """ + + from rmsd import kabsch + struct_to_plot = struct_to_plot.copy() + struct_to_match = struct_to_match.copy() + struct_to_plot -= np.nanmean(struct_to_plot, axis=0) + struct_to_match -= np.nanmean(struct_to_match, axis=0) + mask = np.invert( + np.isnan(struct_to_plot[:, 0]) | np.isnan(struct_to_match[:, 0])) + struct_to_plot, _, _, _ = realign_structures( + struct_to_match, struct_to_plot, rescale=rescale, error_type='rmsd') + struct_to_plot[mask] = np.dot( + struct_to_plot[mask], kabsch( + struct_to_plot[mask], struct_to_match[mask])) + return struct_to_plot + + +def interpolate_chromosomes(X, lengths, eps=1e-1, smooth=0): + from scipy import interpolate + """ + Return a smoothed interpolation of the chromosomes + + Parameters + ---------- + X : ndarray (n, 3) + The 3D structure + + lengths : ndarray (L, ) + The lengths of the chromosomes. Note that the sum of the lengths + should correspond to the length of the ndarray of the 3D structure. + + Returns + ------- + smoothed_X : the smoothed 3D structure, with a set of coordinates for + each chromosome (ie, the first chromosome's points will be + smoothed_X[0]). smoothed_X[0][i] will be a list with an x, y, and + z coordinate. + + """ + + smoothed_X = [[], []] + + mask = np.invert(np.isnan(X[:, 0])) + + begin, end = 0, 0 + + enumerated = enumerate(lengths) + + for i, length in enumerated: + end += length + x = X[begin:end, 0] + y = X[begin:end, 1] + z = X[begin:end, 2] + indx = mask[begin:end] + + if not len(x): + break + + if not indx[0]: + x[0] = x[indx][0] + x[indx][0] = np.nan + y[0] = y[indx][0] + y[indx][0] = np.nan + + z[0] = z[indx][0] + z[indx][0] = np.nan + + if not indx[-1]: + x[-1] = x[indx][-1] + x[indx][-1] = np.nan + y[-1] = y[indx][-1] + z[indx][-1] = np.nan + z[-1] = z[indx][-1] + z[indx][-1] = np.nan + + indx = np.invert(np.isnan(x)) + + m = np.arange(len(x))[indx] + + f_x = interpolate.Rbf(m, x[indx], smooth=smooth) + f_y = interpolate.Rbf(m, y[indx], smooth=smooth) + f_z = interpolate.Rbf(m, z[indx], smooth=smooth) + + for j in range(length - 1): + if (j < length - 2): + m = np.arange(j, j + 1 + 0.1, 0.1) + else: + m = np.arange(j, j + 1, 0.1) + smoothed_X[i].append([f_x(np.arange(m.min(), m.max(), 0.1)), + f_y(np.arange(m.min(), m.max(), 0.1)), + f_z(np.arange(m.min(), m.max(), 0.1))]) + + begin = end + + return smoothed_X + + +def combine_structs(X_beads, X_interp, lengths): + """ + Combines X_beads and X_interp + Parameters + ---------- + X_beads: the original coordinate beads of the structure + + X_interp: the interpolated coordinates of X_beads + + Returns + ------- + result: list + a list that is a combination of the coordinates of X_beads and + X_interp, where in between in each coordinate of X_beads, the + corresponding interpolated coordinates from X_interp are + + result_truth: list + a list of size result, where result_truth[i] = True iff + result[i] is a coordinate from X_beads + """ + + result = [] + result_truth = [] + for i in range(len(lengths)): + curr_length = lengths[i] + beadset = X_beads[i * curr_length:i * curr_length + curr_length] + for j in range(curr_length - 1): + bead = beadset[j] + curr_interp = X_interp[i][j] + xs, ys, zs = curr_interp[0], curr_interp[1], curr_interp[2] + result.append(bead) + result_truth.append(True) + for k in range(len(xs)): + result.append([xs[k], ys[k], zs[k]]) + result_truth.append(False) + result.append(beadset[-1]) + result_truth.append(True) + return result, result_truth + + +def main(): + """ + Load all input data and convert the given input file to a PDB file of the + given output file name. If interpolate is 1, interpolates coordinates + between the coordinates in the input file. The raw PASTIS coordinates are + output as oyxgen atoms, while the interpolated coordinates are output as + hydrogen atoms. In the case where we are interpolating a diploid structure + ("interpolate diploid case"), either lengths or a .bed file must be + provided. + + Parameters + ---------- + input_file : str + Name of the input coordinate file; it should be the coordinates of the + 3d structure output by PASTIS. + output_file: str + Name of the output PDB file + interpolate: int + If 1, interpolates between the coordinate beads in the input file. + If 0, does not. + ploidy: int + If 1, considers the structure to be haploid. + If 2, considers the structure to be diploid. + lengths: int int + Length of each homolog. Only used in the interpolate diploid case, in + which case either --lengths or --bed_file, but not both, must be + provided. Format: length1 length2 + bed_file: str + The .bed file that was input to PASTIS to generate the coordinates in + --input_file. Used here to find the length of each homolog only in the + interpolate diploid case, in which case either --lengths or --bed_file, + but not both, must be provided. + """ + + # Parse the arguments + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--input_file", type=str, required=True, + help="input file name, should be the structure " + + "output by running PASTIS") + parser.add_argument('--output_file', type=str, required=True, + help='output file name, ie ') + parser.add_argument('--interpolate', type=int, required=True, + help="if 1, interpolates between coordinate beads. " + + "if 0, does not.") + parser.add_argument('--ploidy', type=int, required=True, + help='1 for haploid, 2 for diploid') + parser.add_argument('--lengths', type=int, nargs="+", required=False, + default=None, + help="length of each homolog. only used in the " + + "interpolate diploid case, in which case either " + "--lengths or --bed_file, but not both, must " + + "be provided. format: length1 length2") + parser.add_argument('--bed_file', type=str, required=False, default=None, + help=".bed file input to PASTIS to generate the " + + "coordinates in --input_file. used here to " + + "find the length of each homolog only in the " + + "interpolate diploid case, in which case " + + "either --lengths or --bed_file, but not " + + "both, must be provided.") + args = parser.parse_args() + + # Load the structure + struct_coords = np.loadtxt(args.input_file, delimiter=" ") + + # Check the length + if (len(struct_coords) > 10000): + raise ValueError('structure can be at most 10000 beads') + + # Make sure valid ploidy value + if (args.ploidy != 1) and (args.ploidy != 2): + raise ValueError('--ploidy must be 1 (haploid) or 2 (diploid)') + + # Check if we are interpolating + if args.interpolate == 1: + # Check if diploid or haploid case + if args.ploidy == 2: + # Diploid + + # Make sure only one of --lengths or --bed_file was provided + if (args.lengths is not None) and (args.bed_file is not None): + raise ValueError('provided both --lengths or --bed_file ' + + '(one, but not both, must be provided ' + + 'in the interpolate diploid case)') + + # --lengths case + lengths = args.lengths + if (lengths is not None): + if (len(lengths) != 2): + # Not correct number of lengths provided in --lengths + raise ValueError('--lengths requires 2 lengths to be ' + + 'provided') + + # Check to make sure the values in --lengths are okay + elif (lengths[0] + lengths[1] != len(struct_coords)): + raise ValueError('invalid lengths values provided ' + + 'through --lengths') + + # --bed_file case + elif (args.bed_file is not None): + # Get the lengths using the bed file + bed_file = open(args.bed_file).read().splitlines() + homolog_len = len(bed_file) + lengths = [homolog_len, homolog_len] + + # Check to make sure the lengths are okay + if (lengths[0] + lengths[1] != len(struct_coords)): + raise ValueError('invalid lengths values provided ' + + 'through --bed_file') + + else: + # The user did not supply --lengths or --bed_file + raise ValueError('did not provide --lengths or --bed_file ' + + 'in interpolate diploid case') + else: + # Haploid + lengths = [len(struct_coords)] + + # Interpolate coordinates + struct_interpolated = np.array(interpolate_chromosomes(struct_coords, + lengths)) + + # Combine interpolated and original beads + struct_coords, struct_truths = combine_structs(struct_coords, + struct_interpolated, + lengths) + + else: + # Don't interpolate + struct_truths = None + + # Realign the structure + struct_realigned = realign_struct_for_plotting(struct_coords, + struct_coords) + + # Round the structure + struct_round = _round_struct_for_pdb(struct_realigned) + + # Write the structure to a PDB file + writePDB(struct_round, struct_truths, args.output_file) + + # Load the PDB file again + pdb_file = open(args.output_file, 'r').read().split('\n') + + # Fine which lines are nan + bad_lines = [] + for i in range(len(pdb_file)): + tokens = pdb_file[i].split() + true_line = [] + for j in range(len(tokens)): + if ((j >= 5) & (j <= 7)): + token = tokens[j] + if (str(token) != 'nan'): + true_line.append(token) + else: + true_line.append(tokens[j]) + if len(true_line) != len(tokens): + bad_lines.append(i) + + # Print out the not nan lines + with open(args.output_file, 'w') as pdb_output_file: + for i in range(len(pdb_file)): + if i not in bad_lines: + print(pdb_file[i], file=pdb_output_file) + + +if __name__ == "__main__": + main() diff --git a/examples/test_pastis_to_pdb/test_script.py b/examples/test_pastis_to_pdb/test_script.py new file mode 100644 index 00000000..bb43e4cd --- /dev/null +++ b/examples/test_pastis_to_pdb/test_script.py @@ -0,0 +1,117 @@ +import numpy as np +from pastis_to_pdb import interpolate_chromosomes, combine_structs + + +def convert_back(pdb_file): + """ + Loads the PDB file and returns the coordinates in a numpy array. + Parameters + ---------- + pdb_file: the full file name of the PDB file to load + + Returns + ------- + result: numpy array + an array of the coordinates from the input PDB file + """ + lines = open(pdb_file).read().splitlines() + converted = [] + for line in lines: + line = line.split() + if len(line) != 0: + converted.append((float(line[5]), float(line[6]), float(line[7]))) + return np.array(converted) + + +def remove_nans(curr_struct): + """ + Removes any coordinate triples that have a NAN coordinate. Returns the + coordinates. + Parameters + ---------- + curr_struct: the coordinates to remove NANs from; should be a numpy array. + + Returns + ------- + + result: numpy array + an array of the coordinates frmo curr_struct, with any coordinate + triples containing a NAN coordinate removed.e + """ + result = [] + for coords in curr_struct: + skip = False + for coord in coords: + skip = np.isnan(coord) + if (skip): + continue + else: + result.append(coords) + result = np.array(result) + return result + + +def main(): + """ + Loads the PASTIS coordinates and the PDB version of those pastis + coordinates. Centers the PDB coordinates around the origin and scales them + back down. Asserts that the PASTIS coordinates and the modified PDB + coordinates are equal to each other to one decimal place. + + Parameters + ---------- + pastis_coords_file : str + Name of the pastis coordinates file; it should be the coordinates of + 3d structure output by PASTIS. + pdb_file: str + Name of the PDB file (the pdb version of the pastis file). + interpolate: int + if 1, interpolates coordinates between the PASTIS coordinates. If 0, + does not. + """ + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--pastis_coords_file", type=str, required=True, + help="coordinates of the pastis structure") + parser.add_argument("--pdb_file", type=str, required=True, + help="pdb file version of pastis_coords_file") + parser.add_argument("--interpolate", type=int, required=True, + help="if 1, interpolates coordinates between the" + "PASTIS coordinates. If 0, does not.") + parser.add_argument("--ploidy", type=int, required=True, + help="if 1, haploid. If 2, diploid.") + args = parser.parse_args() + + # Load the two files + struct_original = np.loadtxt(args.pastis_coords_file) + struct_pdb = convert_back(args.pdb_file) + + # If we are interpolating coordinates + if (args.interpolate == 1): + # Get the lengths + lengths = [len(struct_original)] + if (args.ploidy == 2): + half_length = int(len(struct_original) / 2) + lengths = [half_length, half_length] + + # Interpolate the coordinates + struct_interpolated = interpolate_chromosomes(struct_original, + lengths) + # Combine interpolated and original beads + struct_original, struct_truths = combine_structs(struct_original, + struct_interpolated, + lengths) + + # Recenter and scale the PDB coordinates (scale factor is 10) + struct_pdb_recentered = (struct_pdb - np.mean(struct_pdb, axis=0)) / 10 + struct_original = remove_nans(struct_original) + + # Assert they are equal to one decimal place + np.testing.assert_array_almost_equal(struct_original, + struct_pdb_recentered, + decimal=1) + + +if __name__ == "__main__": + main() diff --git a/examples/test_pastis_to_pdb/test_script/counts.bed b/examples/test_pastis_to_pdb/test_script/counts.bed new file mode 100644 index 00000000..380ca958 --- /dev/null +++ b/examples/test_pastis_to_pdb/test_script/counts.bed @@ -0,0 +1,60 @@ +Chr01 1 1 0 +Chr01 2 2 1 +Chr01 3 3 2 +Chr01 4 4 3 +Chr01 5 5 4 +Chr01 6 6 5 +Chr01 7 7 6 +Chr01 8 8 7 +Chr01 9 9 8 +Chr01 10 10 9 +Chr01 11 11 10 +Chr01 12 12 11 +Chr01 13 13 12 +Chr01 14 14 13 +Chr01 15 15 14 +Chr01 16 16 15 +Chr01 17 17 16 +Chr01 18 18 17 +Chr01 19 19 18 +Chr01 20 20 19 +Chr01 21 21 20 +Chr01 22 22 21 +Chr01 23 23 22 +Chr01 24 24 23 +Chr01 25 25 24 +Chr01 26 26 25 +Chr01 27 27 26 +Chr01 28 28 27 +Chr01 29 29 28 +Chr01 30 30 29 +Chr01 31 31 30 +Chr01 32 32 31 +Chr01 33 33 32 +Chr01 34 34 33 +Chr01 35 35 34 +Chr01 36 36 35 +Chr01 37 37 36 +Chr01 38 38 37 +Chr01 39 39 38 +Chr01 40 40 39 +Chr01 41 41 40 +Chr01 42 42 41 +Chr01 43 43 42 +Chr01 44 44 43 +Chr01 45 45 44 +Chr01 46 46 45 +Chr01 47 47 46 +Chr01 48 48 47 +Chr01 49 49 48 +Chr01 50 50 49 +Chr01 51 51 50 +Chr01 52 52 51 +Chr01 53 53 52 +Chr01 54 54 53 +Chr01 55 55 54 +Chr01 56 56 55 +Chr01 57 57 56 +Chr01 58 58 57 +Chr01 59 59 58 +Chr01 60 60 59 diff --git a/examples/test_pastis_to_pdb/test_script/diploid.coords b/examples/test_pastis_to_pdb/test_script/diploid.coords new file mode 100644 index 00000000..b7f3d797 --- /dev/null +++ b/examples/test_pastis_to_pdb/test_script/diploid.coords @@ -0,0 +1,120 @@ +-5.208853134670874407e-01 -1.916153170913326154e-01 -1.495565420952985336e-01 +-1.396916459847785230e+00 -2.543050450799024809e-01 -7.725659554014836505e-01 +-3.961177449958663832e-01 -4.968302155934049558e-01 -4.581663173381160248e-01 +-8.004677135270739097e-01 3.274484469088619759e-02 -1.304012875019021189e+00 +-1.797421897199871665e+00 3.220055834276838724e-01 -1.017467891554363346e+00 +-1.087213874584717743e+00 -3.140201628035778070e-01 -5.166627768029737888e-01 +-1.598339091538411583e-01 2.331817027912570228e-01 -5.216266863463668857e-01 +-1.176985620539124477e+00 4.766167508728956226e-01 -7.778299740140676155e-01 +-2.185020466205858547e+00 2.596514036969646000e-01 -4.672671237038418246e-01 +-2.122446746188366706e+00 -6.021333633403023189e-01 -1.110189861097058639e+00 +-1.837873013048856086e+00 3.593891862432483930e-01 -1.502204938337196483e+00 +-1.768087371650254980e+00 -6.776712046014693458e-01 -1.783666211922497302e+00 +-1.513664549198597831e+00 2.096571019628297727e-01 -2.337743368975357061e+00 +-8.020262524220216349e-01 -5.494562382951752033e-01 -2.060870946991701036e+00 +-4.296500922880354678e-01 3.647222090028793273e-01 -1.630515364238846576e+00 +-1.115969002648725006e+00 4.922847037390964964e-01 -2.450353447398896378e+00 +-1.375816060575114630e+00 -4.598078504716991977e-01 -2.019843212894297135e+00 +-2.296796338196372300e+00 -1.252428421181617513e-01 -1.573517756543486357e+00 +-2.246969283258343442e+00 -1.132595700622607726e+00 -1.197371809638305695e+00 +-3.084306116469640813e+00 -6.670002544575796000e-01 -1.689120783198301279e+00 +-3.068366691372069521e+00 3.782053331852310851e-01 -1.947461714663533572e+00 +-3.859375077786392438e+00 4.537181604536184576e-01 -1.221174594057713003e+00 +-3.156261384920139967e+00 -3.578400157336619758e-01 -1.304063309016408878e+00 +-2.235395168051053094e+00 -4.919453538876497056e-01 -1.845409562268156778e+00 +-2.984158601310353820e+00 -2.977345734668527255e-01 -2.594938344620881576e+00 +-3.701082612338077560e+00 4.219470575453862504e-01 -2.952387421537238321e+00 +-3.746934639308389769e+00 1.077831721236576445e+00 -2.098955254215446153e+00 +-3.067423267130151565e+00 5.096070639501154576e-01 -1.486731714429064155e+00 +-2.576558152802477686e+00 -4.345005125701896231e-01 -1.650017766974696043e+00 +-1.666054165140090637e+00 -6.826233703939313946e-01 -1.130951069690436350e+00 +-2.510858731351772288e+00 -9.426596934809944317e-01 -5.154024163088413157e-01 +-3.327167133786903541e+00 -4.964916863189814955e-01 -1.057881531795830776e+00 +-4.259612075032586098e+00 -4.573106485220695960e-01 -1.595269799341316741e+00 +-3.920651838542395407e+00 -1.478717898045747825e+00 -1.629829826313769603e+00 +-4.124274925947099213e+00 -9.136934822266455924e-01 -7.360763321256645897e-01 +-3.252639896060783187e+00 -1.518129868700552443e+00 -9.213559994384133978e-01 +-2.479058668185873504e+00 -2.079169954264836306e+00 -1.417634727963814845e+00 +-2.826143836554169120e+00 -1.477255805988249238e+00 -2.240210333172230506e+00 +-3.719007761979332916e+00 -8.747862694889981805e-01 -2.242649054765399441e+00 +-3.199266015396405649e+00 -1.817059530367511000e+00 -2.215152579245078357e+00 +-3.196078290894914353e+00 -1.064457775912679649e+00 -1.444800388774420075e+00 +-3.668463018825403932e+00 -1.599964131068310147e+00 -2.250480094694093491e+00 +-3.119711715690980380e+00 -6.903852754267736369e-01 -2.071852755284977921e+00 +-3.777911346474404297e+00 -7.391729663681204654e-01 -2.922908640371406186e+00 +-3.614438425064168570e+00 -1.757377166743722796e+00 -3.232674634791011137e+00 +-3.190415182406316053e+00 -7.866342053497473730e-01 -3.426062762441223875e+00 +-2.947774465925725895e+00 1.995483889420454279e-02 -2.755100983144788795e+00 +-3.437782801121912257e+00 4.989480036503125548e-01 -3.585765173522214244e+00 +-2.644873122186556014e+00 5.457999939427156111e-01 -2.858889003964611319e+00 +-2.991604745109091468e+00 5.917890672989616352e-01 -3.877170197128203544e+00 +-3.123682824301441840e+00 1.467081963277471646e+00 -3.264005017838869094e+00 +-2.655149596308459259e+00 5.655654707337834930e-01 -3.620739135722607305e+00 +-3.359786617240536621e+00 1.049355373712100414e+00 -2.965900163304951320e+00 +-3.607935939090953337e+00 7.650047630226911033e-02 -2.575513730799089185e+00 +-3.022638574282096702e+00 9.762856427674132531e-01 -2.657807930162792598e+00 +-3.900184233345724394e+00 5.580238086852178370e-01 -2.193646721390379639e+00 +-4.894625427675736162e+00 9.508608666177020829e-01 -2.321534481653122928e+00 +-4.103065283802749263e+00 2.214813697724794250e-01 -2.291099185641082059e+00 +-4.823464305624570514e+00 -3.201518323420251000e-01 -2.880336853104311867e+00 +-5.272420555967526568e+00 -1.054774287121509335e+00 -2.233593420529385032e+00 +3.203922999039826358e+00 8.520254058508340833e-01 1.041990759724584104e+00 +2.951228467066652872e+00 -8.600106779878136098e-02 1.506022062188580257e+00 +3.560868164392522406e+00 4.873997185907953944e-01 8.285293778680262733e-01 +4.099590629693546262e+00 -4.391869007870055630e-01 9.312367059522901336e-01 +3.182830815164759741e+00 -4.079360121006886386e-01 3.670716627658283659e-01 +2.727441526410844030e+00 5.230591086255640398e-01 6.600425151599397244e-01 +3.024311802801748605e+00 -4.648814211327079726e-01 9.679330361837799046e-01 +3.417580456563374280e+00 3.918537751871445929e-01 4.469275474151706007e-01 +3.126931280278107739e+00 1.404960221524669262e+00 2.251443284845202897e-01 +2.279719744821290028e+00 1.877639173829531938e+00 6.929928267156235666e-01 +1.709342764125355574e+00 1.693174616806388810e+00 1.587884797578597462e+00 +2.087853825469981661e+00 1.172726322940981447e+00 2.451410815092720963e+00 +2.389210469545921978e+00 2.524632314625817608e-01 2.922827397684671258e+00 +2.755186730294687081e+00 -7.564793891386389912e-01 2.833050605215157614e+00 +3.232374611572248924e+00 -7.585500261424138468e-01 1.867627151716960698e+00 +2.723934407197656338e+00 -1.295836172336450876e+00 2.649889809473836788e+00 +2.953954591659949447e+00 -1.536646378569096472e+00 1.625750139918564541e+00 +3.001090178897144689e+00 -8.974811037346315246e-01 7.601280587183206761e-01 +2.328603382672059574e+00 -6.218972916272996754e-02 8.546247790923177101e-01 +1.446847346251367350e+00 -5.243121306299024509e-01 1.264987132491187438e+00 +2.341313347794161004e+00 -7.585164580129140077e-01 1.816615008076759130e+00 +3.278736973131680443e+00 -6.169382857570714362e-01 1.305996717461024303e+00 +2.219241052233759337e+00 -7.408289658688665957e-01 1.158716130906970454e+00 +2.121910733977840025e+00 2.256708875780618628e-01 1.622815189758014309e+00 +1.436817634767435514e+00 1.807091397263096977e-01 7.929755087368967059e-01 +2.195479296435107752e+00 5.986821942179486600e-01 1.432635371396085766e+00 +2.797803984948309175e+00 1.491059653362879045e+00 1.455945158933031225e+00 +2.520790565109315651e+00 5.473870908685836145e-01 1.017523819722242706e+00 +2.115368102777010151e+00 9.012274047459497076e-03 1.779112303272251394e-01 +2.207036401836481865e+00 1.023877787765010039e+00 5.264415077326575210e-01 +1.853256631636952534e+00 1.135508151443228586e+00 1.537412482859702001e+00 +1.222239640831090268e+00 2.710072937641194724e-01 1.655363534529834979e+00 +9.019467607978766122e-01 -4.577924163906535449e-01 2.380270318195134571e+00 +-3.824630672614188348e-02 -8.598383246899179988e-02 2.009840159986565844e+00 +-1.244776501558310655e-01 9.799710841677010675e-01 2.135380332458139740e+00 +8.520785831122031917e-01 1.432998694591224753e+00 2.105528463264249250e+00 +2.507799992262244038e-01 6.023222450328519217e-01 1.776853660197159623e+00 +6.227669128480434191e-01 -1.564248914864144469e-01 2.444083066018173156e+00 +8.391038501592347210e-01 8.917376998747338357e-01 2.324544949769425273e+00 +9.680077911131799162e-01 1.024759166540913924e+00 1.263777167353690389e+00 +1.162142376947673084e+00 5.475212501582154001e-01 2.209435775081535880e+00 +1.440789207406675709e+00 -4.695187131458835039e-01 1.990892689974249619e+00 +2.379699094299200457e+00 5.587359681776238318e-02 1.943823453165138604e+00 +3.390082526086994896e+00 2.076661932191445348e-01 2.284516352684361262e+00 +4.153506051609712202e+00 -5.146111332199052590e-01 2.519167832291961062e+00 +4.502064793820728106e+00 4.920082029037855520e-01 2.675893341762277444e+00 +3.481268400665161522e+00 8.199344465138230786e-01 2.575568701198441612e+00 +4.380324891608275095e+00 1.170952226109174710e+00 2.098152582184081183e+00 +5.267472567866281707e+00 5.621156379396153868e-01 2.138241219565693463e+00 +6.256458282012484773e+00 7.533957580218412220e-01 1.757621096683109618e+00 +5.521179422987196439e+00 1.008147019982629455e+00 2.501925140785793644e+00 +4.759997356439670924e+00 1.656558206068004191e+00 2.901571685403505629e+00 +4.072554040456449087e+00 9.058963125598251676e-01 3.252875281650634776e+00 +5.139859909499770119e+00 8.061839960019379081e-01 3.354957730714550834e+00 +4.552126753318107788e+00 8.342049863540533794e-01 2.453031187239215871e+00 +4.918203427802766647e+00 -1.399946924676670756e-01 2.729629682833051074e+00 +4.787079954548450367e+00 1.684893763151441570e-01 3.752906115010139931e+00 +4.311693962710378969e+00 7.926911657915706177e-01 3.015506437971605003e+00 +4.517249844558161698e+00 -2.639664325080061658e-01 3.044055610625294062e+00 +3.769538417088726057e+00 2.051830552169770228e-01 3.660771755983833309e+00 diff --git a/examples/test_pastis_to_pdb/test_script/haploid.coords b/examples/test_pastis_to_pdb/test_script/haploid.coords new file mode 100644 index 00000000..a412c818 --- /dev/null +++ b/examples/test_pastis_to_pdb/test_script/haploid.coords @@ -0,0 +1,283 @@ +-3.015663468591126550e-01 7.582749767439871391e-01 -6.627330921142997600e-01 +-8.019117105970376969e-01 7.674670478224880910e-01 -6.999619080828901607e-01 +-5.807666159496088776e-01 6.481915397248664279e-01 -8.141992526995290858e-01 +-3.337744899795544451e-01 7.048711640493231512e-01 -1.035032696527414142e+00 +-3.319615901098428301e-01 7.743577398277038393e-01 -1.325933747752239933e+00 +-6.524563613441245469e-01 7.244010472602681050e-01 -1.184428769844394713e+00 +-6.035321318894502074e-01 1.001315968606936169e+00 -1.231426199496924223e+00 +-7.435311685679782956e-01 1.041169969813791951e+00 -9.566682015190985933e-01 +nan nan nan +-5.127322539489418407e-01 1.081484653430210496e+00 -6.444316132665709462e-01 +-3.550253943847156979e-01 1.111512800489149022e+00 -9.132710615119935538e-01 +-5.277142357905771908e-01 1.402847604586016761e+00 -7.946930607901567800e-01 +-6.048209160533812856e-01 1.386958232514384548e+00 -1.090478879890490527e+00 +-3.324233172048184914e-01 1.462495875161176739e+00 -1.112318945219213484e+00 +-2.501558453237907598e-01 1.246865000339121288e+00 -1.290880840774567551e+00 +8.933056827127119383e-04 1.086574340816997220e+00 -1.235293325773928030e+00 +2.348258837706034344e-02 1.110344510691711806e+00 -9.232773634358737436e-01 +5.466809948192328378e-02 1.444586829774868519e+00 -1.138437540273701476e+00 +3.162365283865799515e-01 1.351131275632892814e+00 -9.848540134374184074e-01 +3.086029045500543466e-01 1.671371368544431135e+00 -9.001137626648673118e-01 +9.022248838828336726e-02 1.508318996693548186e+00 -7.435910060796795396e-01 +-9.566123668370629662e-02 1.329770879229292380e+00 -6.080336264566733595e-01 +-2.327003290995245111e-01 1.642669950725151429e+00 -6.511087112092720508e-01 +-5.645997241497074937e-02 1.827534981716912199e+00 -8.310973366007067265e-01 +1.578883136181656877e-01 1.875785126713519935e+00 -6.074015978207274458e-01 +-4.540669047646174461e-02 1.806881954174275107e+00 -4.143100958727871097e-01 +4.819879376500917773e-02 1.507725219794465943e+00 -3.405794654044872471e-01 +2.314410642057695144e-01 1.412743354646598837e+00 -1.124615918239932622e-01 +3.240922707817941451e-01 1.719725581067918396e+00 -2.027865586347261662e-01 +4.510371943466214284e-01 1.648735404664454895e+00 -4.680156903336656904e-01 +5.745140905350147875e-01 1.459620571304694803e+00 -2.467574882735953379e-01 +7.781416599314615601e-01 1.537138866196519071e+00 -4.652816549541513025e-01 +6.007791107590398916e-01 1.340152094453282317e+00 -6.142915108306896688e-01 +3.212728106836998387e-01 1.252225559377613928e+00 -4.673879023136792155e-01 +nan nan nan +7.836240153171841749e-02 9.551975458201636959e-01 -3.709205851612334404e-01 +3.963463938528246633e-01 9.216821264468080255e-01 -2.121308677695179223e-01 +6.791484987191613865e-01 8.454827904725980003e-01 -3.694459310306154598e-01 +4.941709105965628179e-01 7.875489297948203848e-01 -6.691562404136027764e-01 +7.853683914315255921e-01 8.149957820616341442e-01 -8.201114643745612831e-01 +7.634155701138091166e-01 5.041825586755288580e-01 -7.593433083320866306e-01 +1.088870655913965146e+00 4.931687772972990214e-01 -6.706671136206555639e-01 +1.014698382077789951e+00 6.123642017202370891e-01 -4.166243062495459393e-01 +8.371690807919530686e-01 2.534328774292874398e-01 -4.804218893737024487e-01 +5.686412769859956473e-01 3.675115144892258656e-01 -3.829903016845994657e-01 +nan nan nan +2.166154725881668519e-01 4.873513558237743126e-01 -3.660900487009551840e-01 +9.447639593424254911e-02 5.329476488362507869e-01 -6.468605160355610506e-01 +3.579703675639662741e-01 3.687958931595007317e-01 -7.834056112614344869e-01 +2.433452416405921248e-01 5.159570837779504648e-01 -1.050658890450052008e+00 +5.431921712937985980e-01 5.843930351315252958e-01 -1.210631163429683577e+00 +5.218836716846613211e-01 2.862363699466944356e-01 -1.281202930493817949e+00 +8.228656288773396055e-01 2.503690108273990966e-01 -1.134674027059218160e+00 +7.043190707003631479e-01 5.638371631839592807e-02 -9.058855892607197147e-01 +9.446841320927877872e-01 -1.471282466356157448e-01 -1.080790562308724168e+00 +6.773414990126980229e-01 -2.772933171600339142e-01 -1.186275279913743708e+00 +4.129703852319040469e-01 -1.690333632739099146e-01 -1.332780032155364669e+00 +9.294682656840015655e-02 -1.998909384133854550e-01 -1.423434731071590909e+00 +6.156764980414417693e-02 1.118256464081741897e-01 -1.345799386479106907e+00 +-2.122144473596023762e-01 2.249516864568038843e-01 -1.175489057453802388e+00 +-9.253052661962062053e-02 2.003397198300642945e-01 -8.658629102373861519e-01 +-2.715191621259340979e-02 -9.358531472650813421e-02 -9.558706251932916897e-01 +2.683660942990637177e-01 -3.000875034371227289e-03 -9.383744311393146242e-01 +3.834888302912287661e-01 -2.908325976363503318e-01 -7.974540934601989095e-01 +4.264528931995116845e-01 -4.005487024124117917e-02 -5.782644317168018633e-01 +1.271699681755761591e-01 -1.138930274012477095e-01 -5.470972820073831677e-01 +1.958654393058998910e-01 5.510232126434282812e-02 -2.704705311473612483e-01 +-5.350731134832528713e-02 -1.199286679319162552e-01 -2.105972240110838556e-01 +nan nan nan +-1.717640254579718417e-01 2.084341865708368158e-01 -4.144753886906301643e-01 +nan nan nan +nan nan nan +-3.051933253093711551e-01 -1.784761957647359176e-01 -5.430597121618875311e-01 +-4.822878259446701232e-01 6.837288486449338842e-02 -7.012363149996738709e-01 +-7.858040095230728328e-01 2.141742579384716891e-02 -8.473097802234881293e-01 +-6.852042927314366816e-01 -2.788657302179818331e-01 -7.059835692748092084e-01 +-6.854626679526764077e-01 -4.099289385868795099e-01 -1.000169401790615975e+00 +-4.739067487499349363e-01 -1.418439804288048611e-01 -1.156705290911309136e+00 +-3.210503100902761253e-01 -4.197980109588743125e-01 -1.231308350770285553e+00 +-2.366012725428556784e-01 -4.391845080532152945e-01 -9.086457126677267571e-01 +8.955886838636234470e-02 -5.100969445392529167e-01 -1.014724224053531465e+00 +3.517815295035203182e-01 -7.191600089896331705e-01 -1.113967348092800513e+00 +2.029747875401079760e-01 -9.894113622963457422e-01 -1.176803513436833315e+00 +-7.835257521638246125e-02 -8.838067052490493625e-01 -1.214617388716514590e+00 +-1.797650044068916098e-01 -8.687364172906295590e-01 -8.839910614673448519e-01 +-4.824776364276942009e-01 -9.400766412684258277e-01 -9.222720726953358428e-01 +-5.676036478790438267e-01 -7.817570409029525047e-01 -6.498130389969506426e-01 +-3.348668791709845660e-01 -6.194988202958666701e-01 -5.012486953618828789e-01 +-9.010562964319768220e-02 -4.911060640306622038e-01 -2.763715789807102974e-01 +4.171692149192157001e-02 -5.779471181435302807e-01 -5.481041940154097736e-01 +nan nan nan +2.369695123853784446e-01 -8.690206841785217406e-01 -6.788118040262202113e-01 +1.314890103169292779e-01 -1.205394763211687481e+00 -7.221101790510722207e-01 +-1.220060570379650489e-01 -1.321771134821741800e+00 -5.764619598225674268e-01 +-1.443651247250369585e-01 -1.034049923744974064e+00 -3.843254266645260153e-01 +1.233732109346734468e-01 -8.722728299919976225e-01 -2.136558112289961009e-01 +2.518592896068500564e-01 -1.205730494484798410e+00 -2.310805749582227897e-01 +3.050320237417123992e-01 -1.516537260980471258e+00 -3.471784858403771379e-01 +5.576892208352366431e-01 -1.431066822280523176e+00 -7.767051866454453835e-02 +6.962910759197113997e-01 -1.348839361112674995e+00 -3.741437088233858232e-01 +5.890239471971224772e-01 -1.187705790302519215e+00 -6.483850654156128046e-01 +5.735743832431168254e-01 -9.661906622490274987e-01 -4.237097022944595337e-01 +6.218777058162627824e-01 -9.875042607112054505e-01 -4.968605173957219706e-02 +5.044024625454166522e-01 -6.828782876931360768e-01 -8.977671002206547013e-02 +2.861391166284574794e-01 -4.412817536190807299e-01 -1.384003615385486496e-01 +nan nan nan +4.847175101336875236e-01 -5.641410922598228961e-01 -4.823802090441500767e-01 +6.221765877422954505e-01 -3.269728645086193430e-01 -2.888905575135573556e-01 +8.156956024865754662e-01 -3.295493067828518252e-01 -5.678481595345192812e-01 +1.122732512807095517e+00 -2.270051240610777843e-01 -6.433398035084625377e-01 +1.147060472908624895e+00 -5.676842348488636558e-01 -6.466221596670258531e-01 +8.514937139635520014e-01 -7.489878641172792451e-01 -7.305951944292202738e-01 +1.062818323418291078e+00 -1.013978697827732978e+00 -6.553971359897979854e-01 +1.104623482283000779e+00 -1.023371242772463274e+00 -3.561033363512152716e-01 +9.711696168954897512e-01 -7.318070792975313044e-01 -2.323171644585257101e-01 +9.813858058868296652e-01 -4.470745435010028968e-01 -1.183708436270305420e-01 +1.068573907934461387e+00 -1.550909097736540010e-01 -2.696663663452444903e-01 +1.426552527919961566e+00 -1.140416898108809335e-01 -3.402094222307089355e-01 +1.408353449360038923e+00 -3.858410213941106393e-01 -1.983075498015094973e-01 +1.420391134399937805e+00 -5.013273669819295897e-01 1.618160146605258642e-01 +1.401098297753336164e+00 -7.810443392556263298e-01 -5.747328427037776133e-02 +1.316125083028269804e+00 -8.829314883459243868e-01 2.339499837841286511e-01 +1.006658091873944949e+00 -1.034875782830034785e+00 2.163352631434169171e-01 +9.788203696264069498e-01 -9.235241871949283476e-01 5.220347638111485500e-01 +1.093610789526577420e+00 -6.339983730261437023e-01 5.713703507157523731e-01 +1.011540969133249313e+00 -4.977053094112103282e-01 2.950471553045126427e-01 +7.039217800996737662e-01 -6.029126993144995339e-01 3.009574535559815800e-01 +nan nan nan +5.589864317078900857e-01 -2.794210043856446002e-01 1.659723795187970641e-01 +8.370631222650467551e-01 -1.014825492432120252e-01 6.701989076129437339e-02 +6.051193657271944915e-01 1.022616263304504218e-02 -6.528227534651119546e-02 +8.539115725258028533e-01 2.529731602874693563e-01 -1.565809499119778445e-02 +1.180615021917067464e+00 1.824393504114000808e-01 7.679817016167047976e-03 +1.272832892425945328e+00 -5.234064978243477273e-02 2.453048054528508826e-01 +1.381901303777254508e+00 -2.340555209689618443e-01 5.070373842599489311e-01 +1.230017036953627185e+00 8.883171004011986005e-02 6.057667203283733093e-01 +1.066468650088769898e+00 -1.742578420615808754e-01 7.735494376108464909e-01 +8.417305994128969626e-01 -1.406031277041471239e-01 5.149142951002999569e-01 +8.169625984829133047e-01 1.713349795291659594e-01 3.961811475784611236e-01 +1.027259763379416713e+00 4.768717844383635884e-01 3.344309175434218018e-01 +1.240032801392702888e+00 5.129331732753398265e-01 5.181876679887500448e-01 +nan nan nan +4.470453899035121692e-01 2.874651742335507798e-01 1.911855373132846370e-01 +6.714064743514046585e-01 5.845780607827877340e-01 1.654706674823067936e-01 +4.737998533049546657e-01 6.177336805213953230e-01 4.265729802320671893e-01 +6.963904680533711256e-01 5.671585390367395174e-01 6.358344996964473417e-01 +8.956029799092506405e-01 4.001578959489827114e-01 8.809156723986055759e-01 +5.783332820216433579e-01 4.850859612856956993e-01 1.000459471646994558e+00 +6.169212688837937275e-01 1.464356848728274596e-01 8.058300811624571347e-01 +7.016226658294402174e-01 -3.380132414547894204e-02 1.085138429481495859e+00 +5.779932453875733511e-01 1.876402385189445687e-01 1.290552663685355927e+00 +2.984255989250166885e-01 1.531506089076991317e-01 1.147036368740441903e+00 +1.497779137423454199e-01 5.007467956070273812e-02 8.660790365541543023e-01 +2.476477486142401041e-01 3.137223646110602338e-01 6.207415911252696095e-01 +4.299063300109613739e-02 1.163432976764629256e-01 4.701676716528933042e-01 +3.519780432516223523e-01 -5.749905603165897383e-02 4.897999399701523093e-01 +nan nan nan +1.944131339754435472e-01 -2.714177541221509879e-02 1.355836850667628268e-01 +-4.371754530003101741e-02 -1.632613595779188742e-01 2.031556560359991104e-01 +-5.062095001049927295e-02 -2.455706045592424003e-01 5.222075869589942743e-01 +-1.057203376858840693e-01 -5.306998288510769157e-01 5.861488742706351118e-01 +1.958050903058800585e-01 -4.016033296120217755e-01 2.743892215095917186e-01 +2.771176673334195817e-01 -5.210498566155239430e-01 5.647517715009301442e-01 +5.633679060453512522e-01 -4.660219270257825452e-01 7.000249370891100664e-01 +3.903099841805904857e-01 -3.083078767804508513e-01 9.006697718705142242e-01 +1.053111228023603657e-01 -4.266151588515720849e-01 9.620075591581126462e-01 +-5.203656161993796929e-03 -3.575439623192072647e-01 1.265929977186668998e+00 +2.968247029710832097e-01 -3.234270768026045739e-01 1.346631207847926781e+00 +5.178873274785267267e-01 -5.595481178126843202e-01 1.288346474096550009e+00 +7.891756092401908118e-01 -5.784975184832148454e-01 1.098150831598366350e+00 +8.210315957477052340e-01 -8.726679081398788229e-01 9.871866964532907129e-01 +4.997582813481048492e-01 -8.771304024993651360e-01 9.117700467307821022e-01 +4.162008260150046324e-01 -1.057724223627203752e+00 1.148091664814217872e+00 +1.524208660827060524e-01 -8.572007290169206817e-01 1.213569273947369132e+00 +-1.867583487174073187e-01 -8.569550444840445813e-01 1.197106891197321499e+00 +-1.730762656251818576e-01 -7.705489696964903379e-01 8.722525455549817730e-01 +9.145100218204237075e-02 -9.100032191482368971e-01 7.317289104794325594e-01 +-2.487885433513266700e-02 -1.188617953282994133e+00 8.979811928385093012e-01 +-1.460443649969960234e-01 -1.271977781353296422e+00 5.887649440843016890e-01 +1.255455277025006866e-01 -1.506621709901287076e+00 5.571093417921092472e-01 +3.774480324803952680e-01 -1.362465375218854469e+00 7.114056524848203500e-01 +5.582048773576981926e-01 -1.350383434468751753e+00 4.455510556370712028e-01 +4.865199309690939056e-01 -1.013105657390005021e+00 4.828237173173027807e-01 +2.377841228932420503e-01 -1.128880792502682917e+00 2.996369380624652190e-01 +2.626482863334042572e-01 -7.856759789994788301e-01 2.318795308246967557e-01 +-3.500345289129505599e-02 -6.101262495934280938e-01 1.206601251158037069e-01 +-5.620231644240349100e-02 -8.179843486036838529e-01 3.350423394682607858e-01 +-1.548800717078266664e-01 -1.052285054622917659e+00 1.152304408517667023e-01 +-9.353253714031212196e-02 -1.399774481343075916e+00 4.711092144799020054e-02 +-4.215330018215492358e-01 -1.412437441808512828e+00 1.537693368459169263e-01 +-4.874600319822626759e-01 -1.315464553539826831e+00 -1.625097848265423528e-01 +-7.514280820100386338e-01 -1.138340376309955104e+00 -2.912590417958847988e-01 +-9.969359393378061096e-01 -1.008763483143432449e+00 -1.523542061126644809e-01 +-9.235951512721664036e-01 -1.194744167877616725e+00 1.102447370097605905e-01 +-9.515313752970504524e-01 -1.035626378287977101e+00 4.090447063021520613e-01 +-6.935490243020155932e-01 -1.240797153470885705e+00 5.040705566428520523e-01 +-6.275994071780870431e-01 -1.010597675480934932e+00 7.242057171857051445e-01 +-4.518624528392313211e-01 -8.518921462814741385e-01 5.219912109519252441e-01 +-5.524564660172180952e-01 -9.248330787895955307e-01 2.036871347939182286e-01 +-5.632989314378019641e-01 -7.686044899594768331e-01 -9.774454522573777782e-02 +-3.028428515513629238e-01 -6.795276704743148377e-01 -6.647095684038027696e-02 +nan nan nan +-3.698639360741801663e-01 -3.025228148404977002e-01 -9.157690672016485442e-03 +-4.191067259131203704e-01 -4.816266781332541602e-01 2.345984730826356501e-01 +-7.015236229970557291e-01 -5.621859858810904775e-01 3.598179290351362636e-01 +-9.095814924824425773e-01 -6.034095364403038175e-01 1.182083426238599300e-01 +-1.244319473608213933e+00 -6.065508218015334130e-01 1.525342269750807056e-01 +-1.263515608495618503e+00 -5.539719600206717276e-01 -1.467590219219503112e-01 +nan nan nan +nan nan nan +nan nan nan +nan nan nan +nan nan nan +nan nan nan +nan nan nan +-1.097561870202095502e+00 -5.683569589692082369e-01 -4.282566702468462161e-01 +-1.131524117340728219e+00 -2.955307211489663910e-01 -4.154176807526342996e-01 +-7.520402409254568488e-01 -3.432840656773195054e-01 -2.324075291635147311e-01 +-7.842159381590328993e-01 -1.635954370307089734e-01 3.664714671179648831e-02 +-1.088603755997759936e+00 -1.494634027354803107e-01 2.765165356902639805e-03 +nan nan nan +-4.945146193325936079e-01 -1.261188903960488716e-01 3.977303697009692551e-01 +-8.141804129658967160e-01 -1.886064890946885786e-01 4.563096226093061314e-01 +-1.139625385764869492e+00 -1.416292958078788633e-01 4.538929634414421987e-01 +-1.298545546840625287e+00 -2.920107137337160652e-01 6.710749759156112715e-01 +-1.072905993499551958e+00 -5.189113423410035564e-01 6.711359809002329646e-01 +-9.481635648178090570e-01 -3.029921055966779408e-01 9.205086988827888561e-01 +-7.678305081576214386e-01 -5.891854924731272369e-01 1.016268448426319804e+00 +-5.618719716982486512e-01 -4.351110492520893214e-01 7.653802281350777781e-01 +-4.536451017364661609e-01 -3.268577047259646484e-01 1.055676667076307362e+00 +-3.580256584576510526e-01 -1.054873915682305041e-01 8.482904786158182997e-01 +-2.999000174660820806e-01 1.730420438964953567e-01 1.014474126971025569e+00 +-3.818362374154547489e-01 6.617638067913741984e-02 1.271899498992468347e+00 +-7.270397063535668503e-01 3.146075492079836861e-02 1.182614357050513876e+00 +-7.598588282278109229e-01 3.405044038885418067e-01 1.039617872168818513e+00 +-9.860527576294855212e-01 1.603621860386197640e-01 8.582650178361209692e-01 +-6.832520772989277846e-01 1.286803028058904896e-01 7.210881971349164532e-01 +-4.200739768186184664e-01 2.560445986471883151e-01 6.382588430988178629e-01 +nan nan nan +-7.108750196982777103e-01 2.238361813457063476e-01 3.460176653951437808e-01 +-6.892913513440611650e-01 5.158201526724275343e-01 4.943425561908250110e-01 +-9.514278264002102858e-01 5.994770973076231080e-01 6.719418180211196301e-01 +-1.133120263234465686e+00 4.042682847952061009e-01 4.774329220684226383e-01 +-1.080391254478414220e+00 6.334073302112639414e-01 2.532623861760497919e-01 +-1.295174949898287187e+00 2.426816083941834845e-01 1.430416611620392298e-01 +-1.010261856112896428e+00 3.442922545566347026e-01 1.566065798732718586e-03 +-1.121163484107032593e+00 3.698854568714510238e-01 -2.798164174612195465e-01 +-9.004549802965163829e-01 1.406204307013127208e-01 -3.235516497474643649e-01 +nan nan nan +-5.255302163547240779e-01 1.702302328846901081e-01 -6.465832379235675331e-02 +-6.230843792829340311e-01 4.323062282043864668e-01 -2.782930287272863401e-01 +-7.548809974981121051e-01 7.433137644419872681e-01 -9.794336064232821160e-02 +-5.641174513992835982e-01 5.868757312517929314e-01 8.334818279747947811e-02 +-2.940654682599184166e-01 5.901336338625066125e-01 -1.211563799655552554e-01 +nan nan nan +-2.836333509526064445e-01 2.365553831248064787e-01 2.403499832163744354e-01 +-1.699880809303447662e-02 3.913639158180796107e-01 7.960500556344769774e-02 +nan nan nan +nan nan nan +-1.859943978460750769e-01 5.794980154760638325e-01 4.193462108221873708e-01 +-3.582367797073889720e-02 7.837824025653368221e-01 1.920181143556747572e-01 +-3.676623534249944814e-01 9.689605080991308128e-01 2.372600503086084445e-01 +-2.771779931852155765e-01 1.282572971922124161e+00 1.640558574595433194e-01 +-5.839785664100772822e-01 1.247382510600196115e+00 1.950257841054864882e-01 +-5.609254564026742162e-01 1.459923930013897753e+00 3.877756883998661475e-01 +-5.712591421415078585e-01 1.225290628897001133e+00 5.319354764891242038e-01 +-2.908885695757383316e-01 1.317775759665825053e+00 5.183066353502291390e-01 +-3.013105439684770070e-01 9.491295567055149096e-01 6.610817834636861789e-01 +-2.030867788779374405e-01 1.189683198348085114e+00 8.871332246020470214e-01 +1.793698845264660516e-02 1.346429272274516142e+00 7.862892913556961405e-01 +6.180134960458741333e-02 1.072329281541505752e+00 5.783166516192344009e-01 +3.179195017614629970e-01 1.060321157635158551e+00 7.328139629919500075e-01 +1.936494258884669806e-01 1.043344032518609854e+00 9.991083236522118050e-01 +nan nan nan +6.083152893821088675e-02 7.305229790735230377e-01 8.085293492795333492e-01 +-7.505458339714313198e-02 1.004442096659363237e+00 1.214270905379082555e+00 +1.120629487497434712e-01 7.241154891849814756e-01 1.169877422197319383e+00 +-1.705443630203752337e-01 6.785768657088427913e-01 1.223655997561346132e+00 +-2.670935589933097765e-01 6.291427476940076868e-01 9.347026931157295104e-01 +nan nan nan +-5.433235132361402542e-01 8.427299952323797916e-01 9.834790127789668768e-01 +-5.083666522305103630e-01 1.016185031312059950e+00 1.217147804143044576e+00 +-5.463517615564236207e-01 8.095324695728174369e-01 1.357111060841335926e+00 diff --git a/examples/test_pastis_to_pdb/test_script/invalid.bed b/examples/test_pastis_to_pdb/test_script/invalid.bed new file mode 100644 index 00000000..d41fee7b --- /dev/null +++ b/examples/test_pastis_to_pdb/test_script/invalid.bed @@ -0,0 +1,2 @@ +invalid bed file + diff --git a/pastis/optimization/pastis_to_pdb.py b/pastis/optimization/pastis_to_pdb.py new file mode 100644 index 00000000..3e408baf --- /dev/null +++ b/pastis/optimization/pastis_to_pdb.py @@ -0,0 +1,511 @@ +from __future__ import print_function +from scipy import linalg +from sklearn.metrics import euclidean_distances +import numpy as np + +# 160907 GBonora +# Python implementation of 'print_pdb_atom' function in coords.cpp + + +def fprintf(fp, fmt, *args): + """ + Simply an alias for fprintf to allow Python implementation to remain + similar to original C version. + Parameters + ---------- + fp : output file + fmt : C-style format string + args: arguments for format string + """ + fp.write(fmt % args) + + +def print_pdb_atom(outfile, + # chrom_index, + # copy_index, + atom_index, + is_node, + atom_name, + scale_factor, + my_coords): + """ + Prints one line to PDB file. + Parameters + ---------- + outfile : output file + atom_index : integer + 'atom' ID + is_node : boolean + Is this a node or an edge atom? + atom_name : string + eg "N", "O", or "C" + scale_factor : integer + 100 for a sphere with radius 1, 1000 for a sphere with radius 10 + my_coords : + 3D co-ordinates to ouput + """ + # prev_chrom_index = -1 + # atom_index + # if chrom_index != prev_chrom_index: + # atom_index = 1 + # prev_chrom_index = chrom_index + + # http://www.biochem.ucl.ac.uk/~roman/procheck/manual/manappb.html + fprintf(outfile, "ATOM ") # 1- 6: Record ID + fprintf(outfile, "%5d", atom_index) # 7-11: Atom serial number + fprintf(outfile, " ") # 12: Blank + # if (is_node) { # 13-16: Atom name + # fprintf(outfile, "N ") + # } else { + # fprintf(outfile, "O ") + # } + fprintf(outfile, "%s ", atom_name) + fprintf(outfile, " ") # 17-17: Alternative location code + if is_node: # 18-20: 3-letter amino acid code + fprintf(outfile, "NOD") + else: + fprintf(outfile, "EDG") + fprintf(outfile, " ") # 21: Blank + fprintf(outfile, "%c", # 22: Chain identifier code + # get_chrom_id(chrom_index, copy_index)) + 'A') + fprintf(outfile, " ") # 23-26: Residue sequence number + fprintf(outfile, " ") # 27: Insertion code + fprintf(outfile, " ") # 28-30: Blank + fprintf(outfile, "%8.3f%8.3f%8.3f", # 31-54: Atom coordinates + # (my_coords->x + 1.0) * SCALE_FACTOR, + # (my_coords->y + 1.0) * SCALE_FACTOR, + # (my_coords->z + 1.0) * SCALE_FACTOR) + (my_coords[0] + 1.0) * scale_factor, + (my_coords[1] + 1.0) * scale_factor, + (my_coords[2] + 1.0) * scale_factor) + fprintf(outfile, "%6.2f", 1.0) # 55-60: Occupancy value + if is_node: + fprintf(outfile, "%6.2f", 50.0) # 61-66: B-value (thermal factor) + else: + fprintf(outfile, "%6.2f", 75.0) # 61-66: B-value (thermal factor) + fprintf(outfile, " ") # 67: Blank + fprintf(outfile, " ") # 68-70: Blank + fprintf(outfile, "\n") + +def writePDB(Xpdb, bead_truths, pdbfilename): + """ + Write 3D ouput from pastis to PDB file. + Parameters + ---------- + Xpdb : nparray + the 3D co-ordinates + pdbfilename : File to write PDB file to. + """ + # print('PDB file creation!') + atom_name = 'O' + scale_factor = 100 # 100 for a sphere with radius 1, 1000 for a sphere with radius 10 + # pdboutfile = open(pdbfilename,'w') + with open(pdbfilename, "w") as pdboutfile: + for coordIdx in range(0, np.shape(Xpdb)[0]): + # print coordIdx + if coordIdx == 0 or coordIdx == np.shape(Xpdb)[0]: + is_node = True + else: + is_node = False + my_coords = Xpdb[coordIdx, :] + if (bead_truths != None): + if not bead_truths[coordIdx]: + atom_name = 'H' + else: + atom_name = 'O' + print_pdb_atom(pdboutfile, + # chrom_index, + # copy_index, + coordIdx, + is_node, # Is this a node or an edge atom + atom_name, # eg "N", "O", "H", or "C", + scale_factor, + my_coords) + # pdboutfile.close() + +def realignment_error(X, Y, error_type): + """ + If an error occurs during realignment, processes it. + """ + if error_type.lower() == 'rmsd': + return np.sqrt(((X - Y) ** 2.).sum() / len(X)) + elif error_type.lower() == 'distanceerror': + return np.sqrt(( + (euclidean_distances(X) - euclidean_distances(Y)) ** 2.).sum()) + else: + raise ValueError('Error error_type must be rmsd or distanceerror') + + +def realign_structures(X, Y, rescale=False, copy=True, verbose=False, + error_type='rmsd'): + """ + Realigns Y and X + + Parameters + ---------- + X : ndarray (n, 3) + First 3D structure + + Y : ndarray (n, 3) + Second 3D structure + + rescale : boolean, optional, default: False + Whether to rescale Y or not. + + copy : boolean, optional, default: True + Whether to copy the data or not + + verbose : boolean, optional, default: False + The level of verbosity + + Returns + ------- + Y : ndarray (n, 3) + Realigned 3D, Xstructure + """ + if copy: + Y = Y.copy() + X = X.copy() + + mask = np.invert(np.isnan(X[:, 0]) | np.isnan(Y[:, 0])) + + if rescale: + Y, _, _, _ = realign_structures(X, Y) + if error_type.lower() == 'rmsd': + alpha = (X[mask] * Y[mask]).sum() / (Y[mask] ** 2).sum() + elif error_type.lower() == 'distanceerror': + dis_X = euclidean_distances(X[mask]) + dis_Y = euclidean_distances(Y[mask]) + alpha = (dis_X * dis_Y).sum() / (dis_Y ** 2).sum() + + Y *= alpha + + X -= np.nanmean(X, axis=0) + Y -= np.nanmean(Y, axis=0) + + K = np.dot(X[mask].T, Y[mask]) + U, L, V = linalg.svd(K) + V = V.T + + R = np.dot(V, U.T) + if linalg.det(R) < 0: + if verbose: + print("Reflexion found") + V[:, -1] *= -1 + R = np.dot(V, U.T) + Y_fit = np.dot(Y, R) + + error = realignment_error(X[mask], Y_fit[mask], error_type) + + # Check at the mirror + Y_mirror = Y.copy() + Y_mirror[:, 0] = - Y[:, 0] + + K = np.dot(X[mask].T, Y_mirror[mask]) + U, L, V = linalg.svd(K) + V = V.T + if linalg.det(V) < 0: + V[:, -1] *= -1 + + R_mirror = np.dot(V, U.T) + Y_mirror_fit = np.dot(Y_mirror, R_mirror) + error_mirror = realignment_error(X[mask], Y_mirror_fit[mask], error_type) + + if error <= error_mirror: + best_Y_fit = Y_fit + best_error = error + mirror = False + best_R = R + else: + if verbose: + print("Reflexion is better") + best_Y_fit = Y_mirror_fit + best_error = error_mirror + mirror = True + best_R = R_mirror + + return best_Y_fit, best_error, mirror, best_R + + +def find_rotation(X, Y, copy=True): + """ + Finds the rotation matrice C such that \|x - Q.T Y\| is minimum. + + Parameters + ---------- + X : ndarray (n, 3) + First 3D structure + + Y : ndarray (n, 3) + Second 3D structure + + copy : boolean, optional, default: True + Whether to copy the data or not + + Returns + ------- + Y : ndarray (n, 3) + Realigned 3D structure + """ + if copy: + Y = Y.copy() + X = X.copy() + mask = np.invert(np.isnan(X[:, 0]) | np.isnan(Y[:, 0])) + K = np.dot(X[mask].T, Y[mask]) + U, L, V = np.linalg.svd(K) + V = V.T + + t = np.array([[1, 0, 0], + [0, 1, 0], + [0, 0, np.linalg.det(np.dot(V, U.T))]]) + R = np.dot(V, np.dot(t, U.T)) + Y_fit = np.dot(Y, R) + X_mean = X[mask].mean() + Y_fit -= Y_fit[mask].mean() - X_mean + + # Check at the mirror + Y_mirror = Y.copy() + Y_mirror[:, 0] = - Y[:, 0] + + K = np.dot(X[mask].T, Y_mirror[mask]) + U, L, V = np.linalg.svd(K) + V = V.T + + t = np.array([[1, 0, 0], + [0, 1, 0], + [0, 0, np.linalg.det(np.dot(V, U.T))]]) + R_ = np.dot(V, np.dot(t, U.T)) + Y_mirror_fit = np.dot(Y_mirror, R_) + Y_mirror_fit -= Y_mirror[mask].mean() - X_mean + error_mirror = ((X[mask] - Y_mirror_fit[mask]) ** 2).sum() + return R + + +def distance_between_structures(X, Y): + """ + Computes the distances per loci between structures + + Parameters + ---------- + X : ndarray (n, l) + First 3D structure + + Y : ndarray (n, l) + Second 3D structure + + Returns + ------- + distances : (n, ) + Distances between the 2 structures + """ + if X.shape != Y.shape: + raise ValueError("Shapes of the two matrics need to be the same") + + return np.sqrt(((X - Y) ** 2).sum(axis=1)) + + +def _round_struct_for_pdb(struct, max_num_char=8): + """ + Rounds the structure for PDB + """ + struct = np.asarray(struct) + struct_abs = np.where( + np.isfinite(struct) & (struct != 0), + np.abs(struct), 10 ** (max_num_char - 1)) + round_factor = 10 ** (max_num_char - 1 - np.floor(np.log10(struct_abs))) + round_factor = np.where( + np.floor(np.log10(struct_abs)) < max_num_char, + round_factor / 10, round_factor) + return np.round(struct * round_factor) / round_factor + + +def realign_struct_for_plotting(struct_to_plot, struct_to_match, rescale=False): + """ + Realigns the structure for plotting + """ + + from rmsd import kabsch + struct_to_plot = struct_to_plot.copy() + struct_to_match = struct_to_match.copy() + struct_to_plot -= np.nanmean(struct_to_plot, axis=0) + struct_to_match -= np.nanmean(struct_to_match, axis=0) + mask = np.invert( + np.isnan(struct_to_plot[:, 0]) | np.isnan(struct_to_match[:, 0])) + struct_to_plot, _, _, _ = realign_structures( + struct_to_match, struct_to_plot, rescale=rescale, error_type='rmsd') + struct_to_plot[mask] = np.dot( + struct_to_plot[mask], kabsch( + struct_to_plot[mask], struct_to_match[mask])) + return struct_to_plot + +def interpolate_chromosomes(X, lengths, eps=1e-1, smooth=0): + from scipy import interpolate + from scipy.sparse import issparse + """ + Return a smoothed interpolation of the chromosomes + + Parameters + ---------- + X : ndarray (n, 3) + The 3D structure + + lengths : ndarray (L, ) + The lengths of the chromosomes. Note that the sum of the lengths + should correspond to the length of the ndarray of the 3D structure. + + Returns + ------- + smoothed_X : the smoothed 3D structure, with a set of coordinates for + each chromosome (ie, the first chromosome's points will be + smoothed_X[0]). smoothed_X[0][i] will be a list with an x, y, and + z coordinate. + + """ + + smoothed_X = [[],[]] + smoothed_beginning = [] + smoothed_end = [] + + mask = np.invert(np.isnan(X[:, 0])) + + begin, end = 0, 0 + + enumerated = enumerate(lengths) + + for i, length in enumerated: + end += length + x = X[begin:end, 0] + y = X[begin:end, 1] + z = X[begin:end, 2] + indx = mask[begin:end] + + if not len(x): + break + + if not indx[0]: + x[0] = x[indx][0] + x[indx][0] = np.nan + y[0] = y[indx][0] + y[indx][0] = np.nan + + z[0] = z[indx][0] + z[indx][0] = np.nan + + if not indx[-1]: + x[-1] = x[indx][-1] + x[indx][-1] = np.nan + y[-1] = y[indx][-1] + z[indx][-1] = np.nan + z[-1] = z[indx][-1] + z[indx][-1] = np.nan + + indx = np.invert(np.isnan(x)) + + m = np.arange(len(x))[indx] + + f_x = interpolate.Rbf(m, x[indx], smooth=smooth) + f_y = interpolate.Rbf(m, y[indx], smooth=smooth) + f_z = interpolate.Rbf(m, z[indx], smooth=smooth) + + for j in range(length - 1): + if (j < length - 2): + m = np.arange(j, j + 1 + 0.1, 0.1) + else: + m = np.arange(j, j + 1, 0.1) + smoothed_X[i].append([f_x(np.arange(m.min(), m.max(), 0.1)), + f_y(np.arange(m.min(), m.max(), 0.1)), + f_z(np.arange(m.min(), m.max(), 0.1))]) + + begin = end + + return smoothed_X + +def reshape_interpolate(interpolated_x): + new_structure = [] + for i in range(len(interpolated_x)): + curr_row = interpolated_x[i] + xs, ys, zs = curr_row[0], curr_row[1], curr_row[2] + for j in range(len(xs)): + new_structure.append([xs[j], ys[j], zs[j]]) + return np.array(new_structure) + +def combine_structs(X_beads, X_interp): + result = [] + result_truth = [] + for i in range(len(X_beads) - 1): + bead = X_beads[i] + curr_interp = X_interp[i] + xs, ys, zs = curr_interp[0], curr_interp[1], curr_interp[2] + result.append(bead) + result_truth.append(True) + for j in range(len(xs)): + result.append([xs[j], ys[j], zs[j]]) + result_truth.append(False) + result.append(X_beads[-1]) + result_truth.append(True) + return result, result_truth + +def main(): + # Parse the arguments + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--input_file", type=str, required=True, + help="input file name, should be the structure output by running PASTIS") + parser.add_argument('--output_file', type=str, required=True, + help='output file name, ie ') + parser.add_argument('--interpolate_coords', type=bool, default=True, required=False, + help='if true, interpolates the points between the coordinate beads as hydrogen') + args = parser.parse_args() + + # Load the structure + struct_coords = np.loadtxt(args.input_file, delimiter=" ") + + # Get the length of the structure + struct_length = len(struct_coords) + + struct_interpolated = None + if (args.interpolate_coords): + struct_interpolated = interpolate_chromosomes(struct_coords, [struct_length])[0] + struct_coords, struct_truths = combine_structs(struct_coords, struct_interpolated) + + # Check the length + if (len(struct_coords) > 10000): + raise ValueError('structure can be at most 10000 beads') + + # Realign the structure + struct_realigned = realign_struct_for_plotting(struct_to_plot=struct_coords, + struct_to_match=struct_coords) + + # Round the structure + struct_round = _round_struct_for_pdb(struct_realigned) + + # Write the structure to a PDB file + writePDB(struct_round, struct_truths, args.output_file) + + # Load the PDB file again + pdb_file = open(args.output_file, 'r').read().split('\n') + + # Fine which lines are nan + bad_lines = [] + for i in range(len(pdb_file)): + tokens = pdb_file[i].split() + true_line = [] + for j in range(len(tokens)): + if ((j >= 5) & (j <= 7)): + token = tokens[j] + if (str(token) != 'nan'): + true_line.append(token) + else: + true_line.append(tokens[j]) + if len(true_line) != len(tokens): + bad_lines.append(i) + + # Print out the not nan lines + with open(args.output_file, 'w') as pdb_output_file: + for i in range(len(pdb_file)): + if i not in bad_lines: + print(pdb_file[i], file=pdb_output_file) + +if __name__ == "__main__": + main() diff --git a/pastis/script/pastis-to-pdb b/pastis/script/pastis-to-pdb new file mode 100644 index 00000000..bacc638f --- /dev/null +++ b/pastis/script/pastis-to-pdb @@ -0,0 +1,648 @@ +#! /usr/bin/env python + +from __future__ import print_function +import numpy as np +from scipy import linalg +from sklearn.metrics import euclidean_distances + +# 160907 GBonora +# Python implementation of 'print_pdb_atom' function in coords.cpp + + +def fprintf(fp, fmt, *args): + """ + Simply an alias for fprintf to allow Python implementation to remain + similar to original C version. + Parameters + ---------- + fp : output file + fmt : C-style format string + args: arguments for format string + """ + fp.write(fmt % args) + + +def print_pdb_atom(outfile, + # chrom_index, + # copy_index, + atom_index, + is_node, + atom_name, + scale_factor, + my_coords): + """ + Prints one line to PDB file. + Parameters + ---------- + outfile : output file + atom_index : integer + 'atom' ID + is_node : boolean + Is this a node or an edge atom? + atom_name : string + eg "N", "O", or "C" + scale_factor : integer + 100 for a sphere with radius 1, 1000 for a sphere with radius 10 + my_coords : + 3D co-ordinates to ouput + """ + # prev_chrom_index = -1 + # atom_index + # if chrom_index != prev_chrom_index: + # atom_index = 1 + # prev_chrom_index = chrom_index + + # http://www.biochem.ucl.ac.uk/~roman/procheck/manual/manappb.html + fprintf(outfile, "ATOM ") # 1- 6: Record ID + fprintf(outfile, "%5d", atom_index) # 7-11: Atom serial number + fprintf(outfile, " ") # 12: Blank + # if (is_node) { # 13-16: Atom name + # fprintf(outfile, "N ") + # } else { + # fprintf(outfile, "O ") + # } + fprintf(outfile, "%s ", atom_name) + fprintf(outfile, " ") # 17-17: Alternative location code + if is_node: # 18-20: 3-letter amino acid code + fprintf(outfile, "NOD") + else: + fprintf(outfile, "EDG") + fprintf(outfile, " ") # 21: Blank + fprintf(outfile, "%c", # 22: Chain identifier code + # get_chrom_id(chrom_index, copy_index)) + 'A') + fprintf(outfile, " ") # 23-26: Residue sequence number + fprintf(outfile, " ") # 27: Insertion code + fprintf(outfile, " ") # 28-30: Blank + fprintf(outfile, "%8.3f%8.3f%8.3f", # 31-54: Atom coordinates + # (my_coords->x + 1.0) * SCALE_FACTOR, + # (my_coords->y + 1.0) * SCALE_FACTOR, + # (my_coords->z + 1.0) * SCALE_FACTOR) + (my_coords[0] + 1.0) * scale_factor, + (my_coords[1] + 1.0) * scale_factor, + (my_coords[2] + 1.0) * scale_factor) + fprintf(outfile, "%6.2f", 1.0) # 55-60: Occupancy value + if is_node: + fprintf(outfile, "%6.2f", 50.0) # 61-66: B-value (thermal factor) + else: + fprintf(outfile, "%6.2f", 75.0) # 61-66: B-value (thermal factor) + fprintf(outfile, " ") # 67: Blank + fprintf(outfile, " ") # 68-70: Blank + fprintf(outfile, "\n") + + +def writePDB(Xpdb, bead_truths, pdbfilename): + """ + Write 3D ouput from pastis to PDB file. + Parameters + ---------- + Xpdb : nparray + the 3D co-ordinates + bead_truths: boolean list indicating which coordinates are originally + output by PASTIS + pdbfilename : File to write PDB file to. + """ + # print('PDB file creation!') + atom_name = 'O' + scale_factor = 10 + # pdboutfile = open(pdbfilename,'w') + with open(pdbfilename, "w") as pdboutfile: + for coordIdx in range(0, np.shape(Xpdb)[0]): + # print coordIdx + if coordIdx == 0 or coordIdx == np.shape(Xpdb)[0]: + is_node = True + else: + is_node = False + my_coords = Xpdb[coordIdx, :] + if bead_truths is not None: + if not bead_truths[coordIdx]: + atom_name = 'H' + else: + atom_name = 'O' + print_pdb_atom(pdboutfile, + # chrom_index, + # copy_index, + coordIdx, + is_node, # Is this a node or an edge atom + atom_name, # eg "N", "O", or "C", + scale_factor, + my_coords) + # pdboutfile.close() + + +def realignment_error(X, Y, error_type): + """ + If an error occurs during realignment, processes it. + """ + if error_type.lower() == 'rmsd': + return np.sqrt(((X - Y) ** 2.).sum() / len(X)) + elif error_type.lower() == 'distanceerror': + return np.sqrt(( + (euclidean_distances(X) - euclidean_distances(Y)) ** 2.).sum()) + else: + raise ValueError('Error error_type must be rmsd or distanceerror') + + +def realign_structures(X, Y, rescale=False, copy=True, verbose=False, + error_type='rmsd'): + """ + Realigns Y and X + + Parameters + ---------- + X : ndarray (n, 3) + First 3D structure + + Y : ndarray (n, 3) + Second 3D structure + + rescale : boolean, optional, default: False + Whether to rescale Y or not. + + copy : boolean, optional, default: True + Whether to copy the data or not + + verbose : boolean, optional, default: False + The level of verbosity + + Returns + ------- + Y : ndarray (n, 3) + Realigned 3D, Xstructure + """ + if copy: + Y = Y.copy() + X = X.copy() + + mask = np.invert(np.isnan(X[:, 0]) | np.isnan(Y[:, 0])) + + if rescale: + Y, _, _, _ = realign_structures(X, Y) + if error_type.lower() == 'rmsd': + alpha = (X[mask] * Y[mask]).sum() / (Y[mask] ** 2).sum() + elif error_type.lower() == 'distanceerror': + dis_X = euclidean_distances(X[mask]) + dis_Y = euclidean_distances(Y[mask]) + alpha = (dis_X * dis_Y).sum() / (dis_Y ** 2).sum() + + Y *= alpha + + X -= np.nanmean(X, axis=0) + Y -= np.nanmean(Y, axis=0) + + K = np.dot(X[mask].T, Y[mask]) + U, L, V = linalg.svd(K) + V = V.T + + R = np.dot(V, U.T) + if linalg.det(R) < 0: + if verbose: + print("Reflexion found") + V[:, -1] *= -1 + R = np.dot(V, U.T) + Y_fit = np.dot(Y, R) + + error = realignment_error(X[mask], Y_fit[mask], error_type) + + # Check at the mirror + Y_mirror = Y.copy() + Y_mirror[:, 0] = - Y[:, 0] + + K = np.dot(X[mask].T, Y_mirror[mask]) + U, L, V = linalg.svd(K) + V = V.T + if linalg.det(V) < 0: + V[:, -1] *= -1 + + R_mirror = np.dot(V, U.T) + Y_mirror_fit = np.dot(Y_mirror, R_mirror) + error_mirror = realignment_error(X[mask], Y_mirror_fit[mask], error_type) + + if error <= error_mirror: + best_Y_fit = Y_fit + best_error = error + mirror = False + best_R = R + else: + if verbose: + print("Reflexion is better") + best_Y_fit = Y_mirror_fit + best_error = error_mirror + mirror = True + best_R = R_mirror + + return best_Y_fit, best_error, mirror, best_R + + +def find_rotation(X, Y, copy=True): + """ + Finds the rotation matrice C such that |x - Q.T Y| is minimum. + + Parameters + ---------- + X : ndarray (n, 3) + First 3D structure + + Y : ndarray (n, 3) + Second 3D structure + + copy : boolean, optional, default: True + Whether to copy the data or not + + Returns + ------- + Y : ndarray (n, 3) + Realigned 3D structure + """ + if copy: + Y = Y.copy() + X = X.copy() + mask = np.invert(np.isnan(X[:, 0]) | np.isnan(Y[:, 0])) + K = np.dot(X[mask].T, Y[mask]) + U, L, V = np.linalg.svd(K) + V = V.T + + t = np.array([[1, 0, 0], + [0, 1, 0], + [0, 0, np.linalg.det(np.dot(V, U.T))]]) + R = np.dot(V, np.dot(t, U.T)) + Y_fit = np.dot(Y, R) + X_mean = X[mask].mean() + Y_fit -= Y_fit[mask].mean() - X_mean + + # Check at the mirror + Y_mirror = Y.copy() + Y_mirror[:, 0] = - Y[:, 0] + + K = np.dot(X[mask].T, Y_mirror[mask]) + U, L, V = np.linalg.svd(K) + V = V.T + + t = np.array([[1, 0, 0], + [0, 1, 0], + [0, 0, np.linalg.det(np.dot(V, U.T))]]) + R_ = np.dot(V, np.dot(t, U.T)) + Y_mirror_fit = np.dot(Y_mirror, R_) + Y_mirror_fit -= Y_mirror[mask].mean() - X_mean + return R + + +def distance_between_structures(X, Y): + """ + Computes the distances per loci between structures + + Parameters + ---------- + X : ndarray (n, l) + First 3D structure + + Y : ndarray (n, l) + Second 3D structure + + Returns + ------- + distances : (n, ) + Distances between the 2 structures + """ + if X.shape != Y.shape: + raise ValueError("Shapes of the two matrics need to be the same") + + return np.sqrt(((X - Y) ** 2).sum(axis=1)) + + +def _round_struct_for_pdb(struct, max_num_char=8): + """ + Rounds the structure for PDB + """ + struct = np.asarray(struct) + struct_abs = np.where( + np.isfinite(struct) & (struct != 0), + np.abs(struct), 10 ** (max_num_char - 1)) + round_factor = 10 ** (max_num_char - 1 - np.floor(np.log10(struct_abs))) + round_factor = np.where( + np.floor(np.log10(struct_abs)) < max_num_char, + round_factor / 10, round_factor) + return np.round(struct * round_factor) / round_factor + + +def _resize_struct_for_pdb(struct, max_num_char=8): + """ + Resizes the structure for PDB + """ + max_coord_range = (struct.max(axis=0) - struct.min(axis=0)).max() + resize_factor = 10 ** (max_num_char - 1 - np.floor( + np.log10(max_coord_range))) + resized_struct = struct * resize_factor + resized_struct -= resized_struct.min(axis=0) + return resized_struct + + +def realign_struct_for_plotting(struct_to_plot, struct_to_match, + rescale=False): + """ + Realigns the structure for plotting + """ + + from rmsd import kabsch + struct_to_plot = struct_to_plot.copy() + struct_to_match = struct_to_match.copy() + struct_to_plot -= np.nanmean(struct_to_plot, axis=0) + struct_to_match -= np.nanmean(struct_to_match, axis=0) + mask = np.invert( + np.isnan(struct_to_plot[:, 0]) | np.isnan(struct_to_match[:, 0])) + struct_to_plot, _, _, _ = realign_structures( + struct_to_match, struct_to_plot, rescale=rescale, error_type='rmsd') + struct_to_plot[mask] = np.dot( + struct_to_plot[mask], kabsch( + struct_to_plot[mask], struct_to_match[mask])) + return struct_to_plot + + +def interpolate_chromosomes(X, lengths, eps=1e-1, smooth=0): + from scipy import interpolate + """ + Return a smoothed interpolation of the chromosomes + + Parameters + ---------- + X : ndarray (n, 3) + The 3D structure + + lengths : ndarray (L, ) + The lengths of the chromosomes. Note that the sum of the lengths + should correspond to the length of the ndarray of the 3D structure. + + Returns + ------- + smoothed_X : the smoothed 3D structure, with a set of coordinates for + each chromosome (ie, the first chromosome's points will be + smoothed_X[0]). smoothed_X[0][i] will be a list with an x, y, and + z coordinate. + + """ + + smoothed_X = [[], []] + + mask = np.invert(np.isnan(X[:, 0])) + + begin, end = 0, 0 + + enumerated = enumerate(lengths) + + for i, length in enumerated: + end += length + x = X[begin:end, 0] + y = X[begin:end, 1] + z = X[begin:end, 2] + indx = mask[begin:end] + + if not len(x): + break + + if not indx[0]: + x[0] = x[indx][0] + x[indx][0] = np.nan + y[0] = y[indx][0] + y[indx][0] = np.nan + + z[0] = z[indx][0] + z[indx][0] = np.nan + + if not indx[-1]: + x[-1] = x[indx][-1] + x[indx][-1] = np.nan + y[-1] = y[indx][-1] + z[indx][-1] = np.nan + z[-1] = z[indx][-1] + z[indx][-1] = np.nan + + indx = np.invert(np.isnan(x)) + + m = np.arange(len(x))[indx] + + f_x = interpolate.Rbf(m, x[indx], smooth=smooth) + f_y = interpolate.Rbf(m, y[indx], smooth=smooth) + f_z = interpolate.Rbf(m, z[indx], smooth=smooth) + + for j in range(length - 1): + if (j < length - 2): + m = np.arange(j, j + 1 + 0.1, 0.1) + else: + m = np.arange(j, j + 1, 0.1) + smoothed_X[i].append([f_x(np.arange(m.min(), m.max(), 0.1)), + f_y(np.arange(m.min(), m.max(), 0.1)), + f_z(np.arange(m.min(), m.max(), 0.1))]) + + begin = end + + return smoothed_X + + +def combine_structs(X_beads, X_interp, lengths): + """ + Combines X_beads and X_interp + Parameters + ---------- + X_beads: the original coordinate beads of the structure + + X_interp: the interpolated coordinates of X_beads + + Returns + ------- + result: list + a list that is a combination of the coordinates of X_beads and + X_interp, where in between in each coordinate of X_beads, the + corresponding interpolated coordinates from X_interp are + + result_truth: list + a list of size result, where result_truth[i] = True iff + result[i] is a coordinate from X_beads + """ + + result = [] + result_truth = [] + for i in range(len(lengths)): + curr_length = lengths[i] + beadset = X_beads[i * curr_length:i * curr_length + curr_length] + for j in range(curr_length - 1): + bead = beadset[j] + curr_interp = X_interp[i][j] + xs, ys, zs = curr_interp[0], curr_interp[1], curr_interp[2] + result.append(bead) + result_truth.append(True) + for k in range(len(xs)): + result.append([xs[k], ys[k], zs[k]]) + result_truth.append(False) + result.append(beadset[-1]) + result_truth.append(True) + return result, result_truth + + +def main(): + """ + Load all input data and convert the given input file to a PDB file of the + given output file name. If interpolate is 1, interpolates coordinates + between the coordinates in the input file. The raw PASTIS coordinates are + output as oyxgen atoms, while the interpolated coordinates are output as + hydrogen atoms. In the case where we are interpolating a diploid structure + ("interpolate diploid case"), either lengths or a .bed file must be + provided. + + Parameters + ---------- + input_file : str + Name of the input coordinate file; it should be the coordinates of the + 3d structure output by PASTIS. + output_file: str + Name of the output PDB file + interpolate: int + If 1, interpolates between the coordinate beads in the input file. + If 0, does not. + ploidy: int + If 1, considers the structure to be haploid. + If 2, considers the structure to be diploid. + lengths: int int + Length of each homolog. Only used in the interpolate diploid case, in + which case either --lengths or --bed_file, but not both, must be + provided. Format: length1 length2 + bed_file: str + The .bed file that was input to PASTIS to generate the coordinates in + --input_file. Used here to find the length of each homolog only in the + interpolate diploid case, in which case either --lengths or --bed_file, + but not both, must be provided. + """ + + # Parse the arguments + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--input_file", type=str, required=True, + help="input file name, should be the structure " + + "output by running PASTIS") + parser.add_argument('--output_file', type=str, required=True, + help='output file name, ie ') + parser.add_argument('--interpolate', type=int, required=True, + help="if 1, interpolates between coordinate beads. " + + "if 0, does not.") + parser.add_argument('--ploidy', type=int, required=True, + help='1 for haploid, 2 for diploid') + parser.add_argument('--lengths', type=int, nargs="+", required=False, + default=None, + help="length of each homolog. only used in the " + + "interpolate diploid case, in which case either " + "--lengths or --bed_file, but not both, must " + + "be provided. format: length1 length2") + parser.add_argument('--bed_file', type=str, required=False, default=None, + help=".bed file input to PASTIS to generate the " + + "coordinates in --input_file. used here to " + + "find the length of each homolog only in the " + + "interpolate diploid case, in which case " + + "either --lengths or --bed_file, but not " + + "both, must be provided.") + args = parser.parse_args() + + # Load the structure + struct_coords = np.loadtxt(args.input_file, delimiter=" ") + + # Check the length + if (len(struct_coords) > 10000): + raise ValueError('structure can be at most 10000 beads') + + # Make sure valid ploidy value + if (args.ploidy != 1) and (args.ploidy != 2): + raise ValueError('--ploidy must be 1 (haploid) or 2 (diploid)') + + # Check if we are interpolating + if args.interpolate == 1: + # Check if diploid or haploid case + if args.ploidy == 2: + # Diploid + + # Make sure only one of --lengths or --bed_file was provided + if (args.lengths is not None) and (args.bed_file is not None): + raise ValueError('provided both --lengths or --bed_file ' + + '(one, but not both, must be provided ' + + 'in the interpolate diploid case)') + + # --lengths case + lengths = args.lengths + if (lengths is not None): + if (len(lengths) != 2): + # Not correct number of lengths provided in --lengths + raise ValueError('--lengths requires 2 lengths to be ' + + 'provided') + + # Check to make sure the values in --lengths are okay + elif (lengths[0] + lengths[1] != len(struct_coords)): + raise ValueError('invalid lengths values provided ' + + 'through --lengths') + + # --bed_file case + elif (args.bed_file is not None): + # Get the lengths using the bed file + bed_file = open(args.bed_file).read().splitlines() + homolog_len = len(bed_file) + lengths = [homolog_len, homolog_len] + + # Check to make sure the lengths are okay + if (lengths[0] + lengths[1] != len(struct_coords)): + raise ValueError('invalid lengths values provided ' + + 'through --bed_file') + + else: + # The user did not supply --lengths or --bed_file + raise ValueError('did not provide --lengths or --bed_file ' + + 'in interpolate diploid case') + else: + # Haploid + lengths = [len(struct_coords)] + + # Interpolate coordinates + struct_interpolated = np.array(interpolate_chromosomes(struct_coords, + lengths)) + + # Combine interpolated and original beads + struct_coords, struct_truths = combine_structs(struct_coords, + struct_interpolated, + lengths) + + else: + # Don't interpolate + struct_truths = None + + # Realign the structure + struct_realigned = realign_struct_for_plotting(struct_coords, + struct_coords) + + # Round the structure + struct_round = _round_struct_for_pdb(struct_realigned) + + # Write the structure to a PDB file + writePDB(struct_round, struct_truths, args.output_file) + + # Load the PDB file again + pdb_file = open(args.output_file, 'r').read().split('\n') + + # Fine which lines are nan + bad_lines = [] + for i in range(len(pdb_file)): + tokens = pdb_file[i].split() + true_line = [] + for j in range(len(tokens)): + if ((j >= 5) & (j <= 7)): + token = tokens[j] + if (str(token) != 'nan'): + true_line.append(token) + else: + true_line.append(tokens[j]) + if len(true_line) != len(tokens): + bad_lines.append(i) + + # Print out the not nan lines + with open(args.output_file, 'w') as pdb_output_file: + for i in range(len(pdb_file)): + if i not in bad_lines: + print(pdb_file[i], file=pdb_output_file) + + +if __name__ == "__main__": + main()