forked from tokyo/tokyo
-
Notifications
You must be signed in to change notification settings - Fork 1
/
tokyo.pxd
372 lines (269 loc) · 13.3 KB
/
tokyo.pxd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
cimport numpy as np
#
# External imports from Basic Linear Algebra Subroutines (BLAS)
#
cdef extern from "Python.h":
cdef void Py_INCREF(object)
cdef extern from "numpy/arrayobject.h":
cdef void import_array()
cdef object PyArray_ZEROS(int nd, np.npy_intp *dims, int typenum, int fortran)
cdef object PyArray_SimpleNew(int nd, np.npy_intp *dims, int typenum)
cdef object PyArray_EMPTY(int nd, np.npy_intp *dims, int typenum, int fortran)
int PyArray_ISCARRAY(np.ndarray instance) # I can't get this one to work?!?
int NPY_FLOAT # PyArray_FLOAT deprecated.
int NPY_DOUBLE # PyArray_DOUBLE deprecated.
cdef extern from "cblas.h":
enum CBLAS_ORDER: CblasRowMajor, CblasColMajor
enum CBLAS_TRANSPOSE: CblasNoTrans, CblasTrans, CblasConjTrans
enum CBLAS_UPLO: CblasUpper, CblasLower
enum CBLAS_DIAG: CblasNonUnit, CblasUnit
enum CBLAS_SIDE: CblasLeft, CblasRight
###########################################################################
# BLAS level 1 routines
###########################################################################
# Swap vectors: x <-> y
void lib_sswap "cblas_sswap"(int M, float *x, int dx, float *y, int dy)
void lib_dswap "cblas_dswap"(int M, double *x, int dx, double *y, int dy)
# Scale a vector: x <- alpha*x
void lib_sscal "cblas_sscal"(int N, float alpha, float *x, int dx)
void lib_dscal "cblas_dscal"(int N, double alpha, double *x, int dx)
# Copy a vector: y <- x
void lib_scopy "cblas_scopy"(int N, float *x, int dx, float *y, int dy)
void lib_dcopy "cblas_dcopy"(int N, double *x, int dx, double *y, int dy)
# Combined multiply/add: y <- alpha*x + y
void lib_saxpy "cblas_saxpy"(int N, float alpha, float *x, int dx,
float *y, int dy)
void lib_daxpy "cblas_daxpy"(int N, double alpha, double *x, int dx,
double *y, int dy)
# Dot product: x'y
float lib_sdot "cblas_sdot"(int N, float *x, int dx, float *y, int dy)
double lib_ddot "cblas_ddot"(int N, double *x, int dx, double *y, int dy)
# Euclidian (2-)norm: ||x||_2
float lib_snrm2 "cblas_snrm2"(int N, float *x, int dx)
double lib_dnrm2 "cblas_dnrm2"(int N, double *x, int dx)
# One norm: ||x||_1 = sum |xi|
float lib_sasum "cblas_sasum"(int N, float *x, int dx)
double lib_dasum "cblas_dasum"(int N, double *x, int dx)
# Argmax: i = arg max(|xj|)
int lib_isamax "cblas_isamax"(int N, float *x, int dx)
int lib_idamax "cblas_idamax"(int N, double *x, int dx)
# Generate a plane rotation.
void lib_srotg "cblas_srotg"(float *a, float *b, float *c, float *s)
void lib_drotg "cblas_drotg"(double *a, double *b, double *c, double *s)
# Generate a modified plane rotation.
void lib_srotmg "cblas_srotmg"(float *d1, float *d2, float *b1,
float b2, float *P)
void lib_drotmg "cblas_drotmg"(double *d1, double *d2, double *b1,
double b2, double *P)
# Apply a plane rotation.
void lib_srot "cblas_srot"(int N, float *x, int dx,
float *y, int dy,
float c, float s)
void lib_drot "cblas_drot"(int N, double *x, int dx,
double *y, int dy,
double c, double s)
# Apply a modified plane rotation.
void lib_srotm "cblas_srotm"(int N, float *x, int dx,
float *y, int dy,
float *P)
void lib_drotm "cblas_drotm"(int N, double *x, int dx,
double *y, int dy,
double *P)
###########################################################################
# BLAS level 2 routines
###########################################################################
# Combined multiply/add: y <- alpha*Ax + beta*y or y <- alpha*A'x + beta*y
void lib_sgemv "cblas_sgemv"(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA,
int M, int N, float alpha, float *A, int lda,
float *x, int dx,
float beta, float *y, int dy)
void lib_dgemv "cblas_dgemv"(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA,
int M, int N, double alpha, double *A, int lda,
double *x, int dx,
double beta, double *y, int dy)
# Rank-1 update: A <- alpha * x*y' + A
void lib_sger "cblas_sger"(CBLAS_ORDER Order, int M, int N, float alpha,
float *x, int dx, float *y, int dy,
float *A, int lda)
void lib_dger "cblas_dger"(CBLAS_ORDER Order, int M, int N, double alpha,
double *x, int dx, double *y, int dy,
double *A, int lda)
###########################################################################
# BLAS level 3 routines
###########################################################################
void lib_sgemm "cblas_sgemm"(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA,
CBLAS_TRANSPOSE TransB, int M, int N, int K,
float alpha, float *A, int lda, float *B, int ldb,
float beta, float *C, int ldc)
void lib_dgemm "cblas_dgemm"(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA,
CBLAS_TRANSPOSE TransB, int M, int N, int K,
double alpha, double *A, int lda, double *B, int ldb,
double beta, double *C, int ldc)
#####################################
#
# BLAS LEVEL 1 (vector operations)
#
#####################################
# vector swap: x <-> y
cdef void sswap_(int M, float *x, int dx, float *y, int dy)
cdef void sswap(np.ndarray x, np.ndarray y)
cdef void dswap_(int M, double *x, int dx, double *y, int dy)
cdef void dswap(np.ndarray x, np.ndarray y)
# scalar vector multiply: x *= alpha
cdef void sscal_(int N, float alpha, float *x, int dx)
cdef void sscal(float alpha, np.ndarray x)
cdef void dscal_(int N, double alpha, double *x, int dx)
cdef void dscal(double alpha, np.ndarray x)
# vector copy: y <- x
cdef void scopy_(int N, float *x, int dx, float *y, int dy)
cdef void scopy(np.ndarray x, np.ndarray y)
cdef void dcopy_(int N, double *x, int dx, double *y, int dy)
cdef void dcopy(np.ndarray x, np.ndarray y)
# vector addition: y += alpha*x
cdef void saxpy_(int N, float alpha, float *x, int dx, float *y, int dy)
cdef void saxpy(float alpha, np.ndarray x, np.ndarray y)
cdef void daxpy_(int N, double alpha, double *x, int dx, double *y, int dy)
cdef void daxpy(double alpha, np.ndarray x, np.ndarray y)
# vector dot product: x.T y
cdef float sdot_(int N, float *x, int dx, float *y, int dy)
cdef float sdot(np.ndarray x, np.ndarray y)
cdef double ddot_(int N, double *x, int dx, double *y, int dy)
cdef double ddot(np.ndarray x, np.ndarray y)
# Euclidean norm: ||x||_2
cdef float snrm2_(int N, float *x, int dx)
cdef float snrm2(np.ndarray)
cdef double dnrm2_(int N, double *x, int dx)
cdef double dnrm2(np.ndarray)
# sum of absolute values: ||x||_1
cdef float sasum_(int N, float *x, int dx)
cdef float sasum(np.ndarray x)
cdef double dasum_(int N, double *x, int dx)
cdef double dasum(np.ndarray x)
# index of maximum absolute value element
cdef int isamax_(int N, float *x, int dx)
cdef int isamax(np.ndarray x)
cdef int idamax_(int N, double *x, int dx)
cdef int idamax(np.ndarray x)
# Generate a Givens plane rotation: [a,b,c,s] <- rotg(a,b).
cdef np.ndarray srotg_(float a, float b)
cdef np.ndarray srotg(float a, float b)
cdef np.ndarray drotg_(double a, double b)
cdef np.ndarray drotg(double a, double b)
###########################################
#
# BLAS LEVEL 2 (matrix-vector operations)
#
###########################################
# single precision general matrix-vector multiply
cdef void sgemv_(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, int M, int N,
float alpha, float *A, int lda, float *x, int dx,
float beta, float *y, int dy)
# y = alpha * A x + beta * y
# y = alpha * A.T x + beta * y
cdef void sgemv6(CBLAS_TRANSPOSE TransA, float alpha, np.ndarray A,
np.ndarray x, float beta, np.ndarray y)
# y = alpha * A x + beta * y
cdef void sgemv5(float alpha, np.ndarray A,
np.ndarray x, float beta, np.ndarray y)
# y = alpha * A x
cdef void sgemv3(np.ndarray A, np.ndarray x, np.ndarray y)
# return = alpha * A x
cdef np.ndarray sgemv(np.ndarray A, np.ndarray x)
# double precision general matrix-vector multiply
cdef void dgemv_(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, int M, int N,
double alpha, double *A, int lda, double *x, int dx,
double beta, double *y, int dy)
# y = alpha * A x + beta * y
# y = alpha * A.T x + beta * y
cdef void dgemv6(CBLAS_TRANSPOSE TransA, double alpha, np.ndarray A,
np.ndarray x, double beta, np.ndarray y)
# y = alpha * A x + beta * y
cdef void dgemv5(double alpha, np.ndarray A,
np.ndarray x, double beta, np.ndarray y)
# y = alpha * A x
cdef void dgemv3(np.ndarray A, np.ndarray x, np.ndarray y)
# return = alpha * A x
cdef np.ndarray dgemv(np.ndarray A, np.ndarray x)
####
# single precision rank-1 opertion (aka outer product)
cdef void sger_(CBLAS_ORDER Order, int M, int N, float alpha, float *x, int dx,
float *y, int dy, float *A, int lda)
# A += alpha * x y.T (outer product)
cdef void sger4(float alpha, np.ndarray x, np.ndarray y, np.ndarray A)
# A += x y.T (outer product)
cdef void sger3(np.ndarray x, np.ndarray y, np.ndarray A)
# return = x y.T (outer product)
cdef np.ndarray sger(np.ndarray x, np.ndarray y)
# double precision rank-1 opertion (aka outer product)
cdef void dger_(CBLAS_ORDER Order, int M, int N, double alpha, double *x, int dx,
double *y, int dy, double *A, int lda)
# A += alpha * x y.T (outer product)
cdef void dger4(double alpha, np.ndarray x, np.ndarray y, np.ndarray A)
# A += x y.T (outer product)
cdef void dger3(np.ndarray x, np.ndarray y, np.ndarray A)
# return = x y.T (outer product)
cdef np.ndarray dger(np.ndarray x, np.ndarray y)
####################################################
#
# BLAS LEVEL 3 (matrix-matrix operations)
#
####################################################
# single precision general matrix-matrix multiply
cdef void sgemm_(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB,
int M, int N, int K, float alpha, float *A, int lda, float *B,
int ldb, float beta, float *C, int ldc)
# C = alpha * A B + beta * C
# C = alpha * A.T B + beta * C
# C = alpha * A B.T + beta * C
# C = alpha * A.T B.T + beta * C
cdef void sgemm7(CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB, float alpha,
np.ndarray A, np.ndarray B, float beta, np.ndarray C)
# C = alpha * A B + beta * C
cdef void sgemm5(float alpha, np.ndarray A, np.ndarray B, float beta, np.ndarray C)
# C += A B
cdef void sgemm3(np.ndarray A, np.ndarray B, np.ndarray C)
# return = A B
cdef np.ndarray sgemm(np.ndarray A, np.ndarray B)
# double precision general matrix-matrix multiply
cdef void dgemm_(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB,
int M, int N, int K, double alpha, double *A, int lda, double *B,
int ldb, double beta, double *C, int ldc)
# C = alpha * A B + beta * C
# C = alpha * A.T B + beta * C
# C = alpha * A B.T + beta * C
# C = alpha * A.T B.T + beta * C
cdef void dgemm7(CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB, double alpha,
np.ndarray A, np.ndarray B, double beta, np.ndarray C)
# C = alpha * A B + beta * C
cdef void dgemm5(double alpha, np.ndarray A, np.ndarray B, double beta, np.ndarray C)
# C += A B
cdef void dgemm3(np.ndarray A, np.ndarray B, np.ndarray C)
# return = A B
cdef np.ndarray dgemm(np.ndarray A, np.ndarray B)
######################################################################
#
# Utility functions I have put together that aren't in BLAS or LAPACK.
#
######################################################################
# create a new empty matrix
cdef np.ndarray smnewempty(int M, int N)
cdef np.ndarray dmnewempty(int M, int N)
# create a new empty vector
cdef np.ndarray svnewempty(int M)
cdef np.ndarray dvnewempty(int M)
# create a new zero matrix
cdef np.ndarray smnewzero(int M, int N)
cdef np.ndarray dmnewzero(int M, int N)
# create a new zero vector
cdef np.ndarray svnewzero(int M)
cdef np.ndarray dvnewzero(int M)
# set a matrix to all zeros
cdef void smsetzero(np.ndarray A)
cdef void dmsetzero(np.ndarray A)
# set a vector of to all zeros
cdef void svsetzero(np.ndarray x)
cdef void dvsetzero(np.ndarray x)
# matrix addition
# Y += alpha * X
cdef void smaxpy(float alpha, np.ndarray X, np.ndarray Y)
cdef void dmaxpy(double alpha, np.ndarray X, np.ndarray Y)