fp16.cpp

#include "fp16.h"

// Copied from Numpy

static unsigned half2float(unsigned short h) {
  unsigned short h_exp, h_sig;
  unsigned f_sgn, f_exp, f_sig;

  h_exp = (h & 0x7c00u);
  f_sgn = ((unsigned)h & 0x8000u) << 16;
  switch (h_exp) {
    case 0x0000u: /* 0 or subnormal */
      h_sig = (h & 0x03ffu);
      /* Signed zero */
      if (h_sig == 0) {
        return f_sgn;
      }
      /* Subnormal */
      h_sig <<= 1;
      while ((h_sig & 0x0400u) == 0) {
        h_sig <<= 1;
        h_exp++;
      }
      f_exp = ((unsigned)(127 - 15 - h_exp)) << 23;
      f_sig = ((unsigned)(h_sig & 0x03ffu)) << 13;
      return f_sgn + f_exp + f_sig;
    case 0x7c00u: /* inf or NaN */
      /* All-ones exponent and a copy of the significand */
      return f_sgn + 0x7f800000u + (((unsigned)(h & 0x03ffu)) << 13);
    default: /* normalized */
      /* Just need to adjust the exponent and shift */
      return f_sgn + (((unsigned)(h & 0x7fffu) + 0x1c000u) << 13);
  }
}

unsigned short float2half(unsigned f) {
  unsigned f_exp, f_sig;
  unsigned short h_sgn, h_exp, h_sig;

  h_sgn = (unsigned short)((f & 0x80000000u) >> 16);
  f_exp = (f & 0x7f800000u);

  /* Exponent overflow/NaN converts to signed inf/NaN */
  if (f_exp >= 0x47800000u) {
    if (f_exp == 0x7f800000u) {
      /* Inf or NaN */
      f_sig = (f & 0x007fffffu);
      if (f_sig != 0) {
        /* NaN - propagate the flag in the significand... */
        unsigned short ret = (unsigned short)(0x7c00u + (f_sig >> 13));
        /* ...but make sure it stays a NaN */
        if (ret == 0x7c00u) {
          ret++;
        }
        return h_sgn + ret;
      } else {
        /* signed inf */
        return (unsigned short)(h_sgn + 0x7c00u);
      }
    } else {
/* overflow to signed inf */
#if NPY_HALF_GENERATE_OVERFLOW
      npy_set_floatstatus_overflow();
#endif
      return (unsigned short)(h_sgn + 0x7c00u);
    }
  }

  /* Exponent underflow converts to a subnormal half or signed zero */
  if (f_exp <= 0x38000000u) {
    /*
     * Signed zeros, subnormal floats, and floats with small
     * exponents all convert to signed zero halfs.
     */
    if (f_exp < 0x33000000u) {
#if NPY_HALF_GENERATE_UNDERFLOW
      /* If f != 0, it underflowed to 0 */
      if ((f & 0x7fffffff) != 0) {
        npy_set_floatstatus_underflow();
      }
#endif
      return h_sgn;
    }
    /* Make the subnormal significand */
    f_exp >>= 23;
    f_sig = (0x00800000u + (f & 0x007fffffu));
#if NPY_HALF_GENERATE_UNDERFLOW
    /* If it's not exactly represented, it underflowed */
    if ((f_sig & (((unsigned)1 << (126 - f_exp)) - 1)) != 0) {
      npy_set_floatstatus_underflow();
    }
#endif
    f_sig >>= (113 - f_exp);
/* Handle rounding by adding 1 to the bit beyond half precision */
#if NPY_HALF_ROUND_TIES_TO_EVEN
    /*
     * If the last bit in the half significand is 0 (already even), and
     * the remaining bit pattern is 1000...0, then we do not add one
     * to the bit after the half significand.  In all other cases, we do.
     */
    if ((f_sig & 0x00003fffu) != 0x00001000u) {
      f_sig += 0x00001000u;
    }
#else
    f_sig += 0x00001000u;
#endif
    h_sig = (unsigned short)(f_sig >> 13);
    /*
     * If the rounding causes a bit to spill into h_exp, it will
     * increment h_exp from zero to one and h_sig will be zero.
     * This is the correct result.
     */
    return (unsigned short)(h_sgn + h_sig);
  }

  /* Regular case with no overflow or underflow */
  h_exp = (unsigned short)((f_exp - 0x38000000u) >> 13);
  /* Handle rounding by adding 1 to the bit beyond half precision */
  f_sig = (f & 0x007fffffu);
#if NPY_HALF_ROUND_TIES_TO_EVEN
  /*
   * If the last bit in the half significand is 0 (already even), and
   * the remaining bit pattern is 1000...0, then we do not add one
   * to the bit after the half significand.  In all other cases, we do.
   */
  if ((f_sig & 0x00003fffu) != 0x00001000u) {
    f_sig += 0x00001000u;
  }
#else
  f_sig += 0x00001000u;
#endif
  h_sig = (unsigned short)(f_sig >> 13);
/*
 * If the rounding causes a bit to spill into h_exp, it will
 * increment h_exp by one and h_sig will be zero.  This is the
 * correct result.  h_exp may increment to 15, at greatest, in
 * which case the result overflows to a signed inf.
 */
#if NPY_HALF_GENERATE_OVERFLOW
  h_sig += h_exp;
  if (h_sig == 0x7c00u) {
    npy_set_floatstatus_overflow();
  }
  return h_sgn + h_sig;
#else
  return h_sgn + h_exp + h_sig;
#endif
}

void floattofp16(unsigned char *dst, float *src, unsigned nelem) {
  unsigned i;
  unsigned short *_dst = (unsigned short *)dst;
  unsigned *_src = (unsigned *)src;

  for (i = 0; i < nelem; i++) _dst[i] = float2half(_src[i]);
}

void fp16tofloat(float *dst, unsigned char *src, unsigned nelem) {
  unsigned i;
  unsigned *_dst = (unsigned *)dst;
  unsigned short *_src = (unsigned short *)src;

  for (i = 0; i < nelem; i++) _dst[i] = half2float(_src[i]);
}