-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfp16.cpp
164 lines (152 loc) · 4.8 KB
/
fp16.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#include "fp16.h"
// Copied from Numpy
static unsigned half2float(unsigned short h) {
unsigned short h_exp, h_sig;
unsigned f_sgn, f_exp, f_sig;
h_exp = (h & 0x7c00u);
f_sgn = ((unsigned)h & 0x8000u) << 16;
switch (h_exp) {
case 0x0000u: /* 0 or subnormal */
h_sig = (h & 0x03ffu);
/* Signed zero */
if (h_sig == 0) {
return f_sgn;
}
/* Subnormal */
h_sig <<= 1;
while ((h_sig & 0x0400u) == 0) {
h_sig <<= 1;
h_exp++;
}
f_exp = ((unsigned)(127 - 15 - h_exp)) << 23;
f_sig = ((unsigned)(h_sig & 0x03ffu)) << 13;
return f_sgn + f_exp + f_sig;
case 0x7c00u: /* inf or NaN */
/* All-ones exponent and a copy of the significand */
return f_sgn + 0x7f800000u + (((unsigned)(h & 0x03ffu)) << 13);
default: /* normalized */
/* Just need to adjust the exponent and shift */
return f_sgn + (((unsigned)(h & 0x7fffu) + 0x1c000u) << 13);
}
}
unsigned short float2half(unsigned f) {
unsigned f_exp, f_sig;
unsigned short h_sgn, h_exp, h_sig;
h_sgn = (unsigned short)((f & 0x80000000u) >> 16);
f_exp = (f & 0x7f800000u);
/* Exponent overflow/NaN converts to signed inf/NaN */
if (f_exp >= 0x47800000u) {
if (f_exp == 0x7f800000u) {
/* Inf or NaN */
f_sig = (f & 0x007fffffu);
if (f_sig != 0) {
/* NaN - propagate the flag in the significand... */
unsigned short ret = (unsigned short)(0x7c00u + (f_sig >> 13));
/* ...but make sure it stays a NaN */
if (ret == 0x7c00u) {
ret++;
}
return h_sgn + ret;
} else {
/* signed inf */
return (unsigned short)(h_sgn + 0x7c00u);
}
} else {
/* overflow to signed inf */
#if NPY_HALF_GENERATE_OVERFLOW
npy_set_floatstatus_overflow();
#endif
return (unsigned short)(h_sgn + 0x7c00u);
}
}
/* Exponent underflow converts to a subnormal half or signed zero */
if (f_exp <= 0x38000000u) {
/*
* Signed zeros, subnormal floats, and floats with small
* exponents all convert to signed zero halfs.
*/
if (f_exp < 0x33000000u) {
#if NPY_HALF_GENERATE_UNDERFLOW
/* If f != 0, it underflowed to 0 */
if ((f & 0x7fffffff) != 0) {
npy_set_floatstatus_underflow();
}
#endif
return h_sgn;
}
/* Make the subnormal significand */
f_exp >>= 23;
f_sig = (0x00800000u + (f & 0x007fffffu));
#if NPY_HALF_GENERATE_UNDERFLOW
/* If it's not exactly represented, it underflowed */
if ((f_sig & (((unsigned)1 << (126 - f_exp)) - 1)) != 0) {
npy_set_floatstatus_underflow();
}
#endif
f_sig >>= (113 - f_exp);
/* Handle rounding by adding 1 to the bit beyond half precision */
#if NPY_HALF_ROUND_TIES_TO_EVEN
/*
* If the last bit in the half significand is 0 (already even), and
* the remaining bit pattern is 1000...0, then we do not add one
* to the bit after the half significand. In all other cases, we do.
*/
if ((f_sig & 0x00003fffu) != 0x00001000u) {
f_sig += 0x00001000u;
}
#else
f_sig += 0x00001000u;
#endif
h_sig = (unsigned short)(f_sig >> 13);
/*
* If the rounding causes a bit to spill into h_exp, it will
* increment h_exp from zero to one and h_sig will be zero.
* This is the correct result.
*/
return (unsigned short)(h_sgn + h_sig);
}
/* Regular case with no overflow or underflow */
h_exp = (unsigned short)((f_exp - 0x38000000u) >> 13);
/* Handle rounding by adding 1 to the bit beyond half precision */
f_sig = (f & 0x007fffffu);
#if NPY_HALF_ROUND_TIES_TO_EVEN
/*
* If the last bit in the half significand is 0 (already even), and
* the remaining bit pattern is 1000...0, then we do not add one
* to the bit after the half significand. In all other cases, we do.
*/
if ((f_sig & 0x00003fffu) != 0x00001000u) {
f_sig += 0x00001000u;
}
#else
f_sig += 0x00001000u;
#endif
h_sig = (unsigned short)(f_sig >> 13);
/*
* If the rounding causes a bit to spill into h_exp, it will
* increment h_exp by one and h_sig will be zero. This is the
* correct result. h_exp may increment to 15, at greatest, in
* which case the result overflows to a signed inf.
*/
#if NPY_HALF_GENERATE_OVERFLOW
h_sig += h_exp;
if (h_sig == 0x7c00u) {
npy_set_floatstatus_overflow();
}
return h_sgn + h_sig;
#else
return h_sgn + h_exp + h_sig;
#endif
}
void floattofp16(unsigned char *dst, float *src, unsigned nelem) {
unsigned i;
unsigned short *_dst = (unsigned short *)dst;
unsigned *_src = (unsigned *)src;
for (i = 0; i < nelem; i++) _dst[i] = float2half(_src[i]);
}
void fp16tofloat(float *dst, unsigned char *src, unsigned nelem) {
unsigned i;
unsigned *_dst = (unsigned *)dst;
unsigned short *_src = (unsigned short *)src;
for (i = 0; i < nelem; i++) _dst[i] = half2float(_src[i]);
}