-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtridi_wktn.f90
181 lines (154 loc) · 4.53 KB
/
tridi_wktn.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
! seeing how easy it is to shove all of the wakatani stuff
! into a single subroutine
program tridi_wktn
implicit none
include "mpif.h"
integer n, myid, numproc, ierr, stat(MPI_STATUS_SIZE)
real*8, dimension(:), allocatable :: a, b, c, d, x, bp, acorr, acorr2
integer indexspan, startindex, ii
parameter (n=2**24)
real*8 t0, t1, t2, avgtime
call MPI_Init(ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, myid, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, numproc, ierr)
indexspan = n/numproc
startindex = myid*indexspan+1
allocate(a(n))
allocate(b(n))
allocate(c(n))
allocate(d(n))
allocate(x(indexspan))
allocate(bp(n))
allocate(acorr(indexspan))
allocate(acorr2(indexspan))
! initialize things
a = 1d0
b = -2d0
c = 1d0
d = -5d-1
if (numproc .gt. 1) then
! we want to precompute bp
bp(1) = b(1)
do ii=2,n
bp(ii) = b(ii) - (a(ii)/bp(ii-1))*c(ii-1)
enddo
! we also want to pre-compute the correction coefs for the entire vector
acorr = 0d0
if (myid .ne. 0) then
acorr(1) = -a(startindex)/bp(startindex-1)
do ii=2,indexspan
acorr(ii) = (-a(startindex+ii-1)/bp(startindex+ii-2)) * acorr(ii-1)
enddo
endif
acorr2 = 0d0
if (myid .ne. numproc-1) then
acorr2(indexspan) = -c(startindex+indexspan-1)/bp(startindex+indexspan-1)
do ii=indexspan-1,1,-1
acorr2(ii) = (-c(startindex+ii-1)/bp(startindex+ii-1)) * acorr2(ii+1)
enddo
endif
endif
t0 = MPI_Wtime()
! for testing on 1 processor
if (numproc .eq. 1) then
call solve_tridiag(a,b,c,d,x,n)
else
call tridi(a,b,c,d,x,n,myid,numproc,bp,acorr,acorr2,indexspan,startindex)
endif
t1 = MPI_Wtime(ierr) - t0
! output
do ii=0,numproc-1
if (myid .eq. ii) then
! write(*,*), "proc", myid
! write(*,'(8f8.2)'), x
endif
call MPI_Barrier(MPI_COMM_WORLD, ierr)
enddo
call MPI_ALLREDUCE(t1, avgtime, 1, MPI_REAL8, MPI_SUM, MPI_COMM_WORLD, ierr)
if (myid .eq. 0) then
write(*,*) (avgtime/numproc)
endif
! too lazy to deallocate
call MPI_Finalize(ierr)
end program tridi_wktn
! the magic happens here
! acorr, acorr2, and bp can all be computed once by a subroutine on program start
subroutine tridi(a,b,c,d,x,n,myid,numproc,bp,acorr,acorr2,indexspan,startindex)
implicit none
include "mpif.h"
integer n, myid, numproc, ii, ierr, req
integer stat(MPI_STATUS_SIZE)
integer indexspan, startindex
real(8), dimension(n) :: a,b,c,d,bp ! apparently needs the ::
real(8), dimension(indexspan) :: vp, x, acorr, acorr2
real(8) :: m, corr, sendcorr
! Start First Pass
! Guess Phase
vp(1) = d(startindex)
do ii=2,indexspan
m = a(startindex+ii-1)/bp(startindex+ii-2)
vp(ii) = d(startindex+ii-1) - m*vp(ii-1) ! fuck this line of code.
enddo
! Propagation Phase
if (myid .ne. 0) then
call MPI_Recv(corr, 1, MPI_REAL8, myid-1, 0, MPI_COMM_WORLD, stat, ierr)
vp(indexspan) = vp(indexspan) + acorr(indexspan)*corr
endif
sendcorr = vp(indexspan) ! could have sworn this was different. whatevs.
if (myid .ne. numproc-1) then
call MPI_Isend(sendcorr, 1, MPI_REAL8, myid+1, 0, MPI_COMM_WORLD, req, ierr)
endif
! Correction Phase
do ii=1,indexspan-1
vp(ii) = vp(ii) + acorr(ii)*corr
enddo
if (myid .ne. numproc-1) then
call MPI_Wait(req, stat, ierr)
endif
! Start Second Phase
! Guess Phase
x(indexspan) = vp(indexspan)/bp(indexspan+startindex-1)
do ii=indexspan-1,1,-1
x(ii) = vp(ii)/bp(startindex+ii-1) - (c(startindex+ii-1)/bp(startindex+ii-1))*x(ii+1)
enddo
! Propagation Phase
if (myid .ne. numproc-1) then
call MPI_Recv(corr, 1, MPI_REAL8, myid+1, 0, MPI_COMM_WORLD, stat, ierr)
x(1) = x(1) + acorr2(1)*corr
endif
sendcorr = x(1) ! could have sworn this was different. whatevs.
if (myid .ne. 0) then
call MPI_Isend(sendcorr, 1, MPI_REAL8, myid-1, 0, MPI_COMM_WORLD, req, ierr)
endif
! Correction Phase
do ii=2,indexspan
x(ii) = x(ii) + acorr2(ii)*corr
enddo
if (myid .ne. 0) then
call MPI_Wait(req, stat, ierr)
endif
end subroutine tridi
! for reference
subroutine solve_tridiag(a,b,c,v,x,n)
implicit none
integer,intent(in) :: n
real(8),dimension(n),intent(in) :: a,b,c,v
real(8),dimension(n),intent(out) :: x
real(8),dimension(n) :: bp,vp
real(8) :: m
integer i
! Make copies of the b and v variables so that they are unaltered by this sub
bp(1) = b(1)
vp(1) = v(1)
!The first pass (setting coefficients):
firstpass: do i = 2,n
m = a(i)/bp(i-1)
bp(i) = b(i) - m*c(i-1)
vp(i) = v(i) - m*vp(i-1)
end do firstpass
x(n) = vp(n)/bp(n)
!The second pass (back-substition)
backsub:do i = n-1, 1, -1
x(i) = (vp(i) - c(i)*x(i+1))/bp(i)
end do backsub
end subroutine solve_tridiag