-
Notifications
You must be signed in to change notification settings - Fork 2
/
sequence_alignment.py
122 lines (93 loc) · 3.36 KB
/
sequence_alignment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import sys
MAX_INT = sys.maxsize
GAP = 2
MISMATCH = 3
GAP_CHARACTER = '_'
def build_solution(sequence_a, sequence_b):
rows = len(sequence_a) + 1
columns = len(sequence_b) + 1 # [0..4] se o tamanho da palavra for 4 [0..3]
matrix = [[GAP * row] + ([0] * (columns - 1)) for row in range(rows)]
matrix[0] = [GAP * column for column in range(columns)]
for row in range(1, rows):
for column in range(1, columns):
# Verificação se é match ou mismatch
if sequence_b[column - 1] == sequence_a[row - 1]:
mismatch = 0
else:
mismatch = MISMATCH
# Verificar o menor dentre os adjacentes
results = [matrix[row - 1][column - 1] + mismatch, matrix[row - 1][column] + GAP, matrix[row][column - 1] + GAP]
matrix[row][column] = min(results)
penalty = matrix[rows - 1][columns - 1]
return matrix, penalty
def find_solution(matrix, sequence_a, sequence_b):
# Initial position
row = len(sequence_a)
column = len(sequence_b)
solution_a = ''
solution_b = ''
while column != 0 or row != 0:
adj = []
if column > 0 and row > 0:
adj = [matrix[row - 1][column - 1], matrix[row - 1][column], matrix[row][column - 1]]
else:
adj.append(MAX_INT)
if row == 0:
adj.append(MAX_INT)
else:
adj.append(matrix[row - 1][column])
if column == 0:
adj.append(MAX_INT)
else:
adj.append(matrix[row][column - 1])
index = adj.index(min(adj))
if index == 0:
solution_a += sequence_a[row - 1]
solution_b += sequence_b[column - 1]
row -= 1
column -= 1
elif index == 1:
solution_a += sequence_a[row - 1]
solution_b += GAP_CHARACTER
row -= 1
elif index == 2:
solution_a += GAP_CHARACTER
solution_b += sequence_b[column - 1]
column -= 1
return solution_a[::-1], solution_b[::-1]
def print_solution(matrix, sequence_a, sequence_b):
print(" " + GAP_CHARACTER, end=' ')
result = ' ' + GAP_CHARACTER + ' '
for i in range(len(sequence_b)):
print(sequence_b[i], end=' ')
result += str(sequence_b[i]) + ' '
print()
result += "\n"
for i in range(0, len(matrix)):
if i == 0:
print(GAP_CHARACTER, end=' ')
result += GAP_CHARACTER + ' '
else:
print(sequence_a[i - 1], end=' ')
result += str(sequence_a[i - 1]) + ' '
for j in range(0, len(matrix[0])):
print(matrix[i][j], end=' ')
result += str(matrix[i][j]) + ' '
print()
result += "\n"
return result
if __name__ == '__main__':
line_sequence = str(input("Line Sequence: "))
column_sequence = str(input("Column Sequence: "))
print(line_sequence)
print(len(line_sequence))
print(column_sequence)
print(len(column_sequence))
matrix, penalty = build_solution(line_sequence, column_sequence)
print_solution(matrix, line_sequence, column_sequence)
for row in matrix:
print(row)
solution_a, solution_b = find_solution(matrix, line_sequence, column_sequence)
print(penalty)
print(solution_a)
print(solution_b)