From 18e10afc5c6318d4bb0ce331fde0013a0efcbdd1 Mon Sep 17 00:00:00 2001
From: Christopher Chang <chrchang523@gmail.com>
Date: Sat, 3 Feb 2024 21:04:16 -0800
Subject: [PATCH] 10+ ALT allele --vcf-half-call missing bugfix, --hardy/--hwe
 log-pvalues

---
 2.0/include/plink2_base.h   |    2 +-
 2.0/include/plink2_stats.cc | 1419 +++++++++++++++++++++++++++++++----
 2.0/include/plink2_stats.h  |   29 +-
 2.0/plink2.cc               |   58 +-
 2.0/plink2_filter.cc        |   39 +-
 2.0/plink2_filter.h         |    2 +-
 2.0/plink2_help.cc          |    5 +-
 2.0/plink2_import.cc        |   10 +
 2.0/plink2_ld.cc            |   19 +-
 2.0/plink2_misc.cc          |  109 ++-
 2.0/plink2_misc.h           |   35 +-
 11 files changed, 1462 insertions(+), 265 deletions(-)

diff --git a/2.0/include/plink2_base.h b/2.0/include/plink2_base.h
index 0d3a12ee..1950ef12 100644
--- a/2.0/include/plink2_base.h
+++ b/2.0/include/plink2_base.h
@@ -106,7 +106,7 @@
 // 10000 * major + 100 * minor + patch
 // Exception to CONSTI32, since we want the preprocessor to have access
 // to this value.  Named with all caps as a consequence.
-#define PLINK2_BASE_VERNUM 812
+#define PLINK2_BASE_VERNUM 813
 
 
 #define _FILE_OFFSET_BITS 64
diff --git a/2.0/include/plink2_stats.cc b/2.0/include/plink2_stats.cc
index a2acbf6c..99559063 100644
--- a/2.0/include/plink2_stats.cc
+++ b/2.0/include/plink2_stats.cc
@@ -22,8 +22,8 @@ namespace plink2 {
 
 // Thread-unsafe portions of plink_stats.c have been replaced, mostly by code
 // derived from boost/math/special_functions/gamma.hpp and
-// boost/math/special_functions/detail/igamma_inverse.hpp in Boost 1.60
-// (Maddock et al.).  The derived portions are subject to the following
+// boost/math/special_functions/detail/igamma_inverse.hpp in Boost 1.60 and
+// 1.84 (Maddock et al.).  The derived portions are subject to the following
 // license:
 //
 // *****
@@ -345,78 +345,122 @@ double finite_half_gamma_q2(uint32_t a_minus_half, double xx, double* p_derivati
   return ee;
 }
 
-static const double kLanczosSumNumer[6] = {8706.3495925490091, 8523.650341121874, 3338.0292194764235, 653.64249942940087, 63.999518449381870, 2.5066282746310063};
-static const double kLanczosSumDenom[6] = {0, 24, 50, 35, 10, 1};
-static const double kLanczosSumExpgNumer[6] = {32.812445410297834, 32.123889414443320, 12.580347294552161, 2.4634444783532414, 0.2412010548258800, 0.0094469677045392};
+static const double kLanczosFloatSumNumer[6] = {8706.3495925490091, 8523.650341121874, 3338.0292194764235, 653.64249942940087, 63.999518449381870, 2.5066282746310063};
+static const double kLanczosFloatSumDenom[6] = {0, 24, 50, 35, 10, 1};
+static const double kLanczosFloatSumExpgNumer[6] = {32.812445410297834, 32.123889414443320, 12.580347294552161, 2.4634444783532414, 0.2412010548258800, 0.0094469677045392};
 
 // this depends on the polynomial coefficients above
-static const double kLanczosG = 5.581;
+static const double kLanczosFloatG = 5.581;
 
-double lanczos_sum(double zz) {
+double lanczos_sum_f(double zz) {
   double s1;
   double s2;
   if (zz <= 1) {
-    s1 = kLanczosSumNumer[5];
-    s2 = kLanczosSumDenom[5];
+    s1 = kLanczosFloatSumNumer[5];
+    s2 = kLanczosFloatSumDenom[5];
     for (int32_t ii = 4; ii >= 0; --ii) {
       s1 *= zz;
       s2 *= zz;
-      s1 += kLanczosSumNumer[S_CAST(uint32_t, ii)];
-      s2 += kLanczosSumDenom[S_CAST(uint32_t, ii)];
+      s1 += kLanczosFloatSumNumer[S_CAST(uint32_t, ii)];
+      s2 += kLanczosFloatSumDenom[S_CAST(uint32_t, ii)];
     }
   } else {
     zz = 1 / zz;
-    s1 = kLanczosSumNumer[0];
-    s2 = kLanczosSumDenom[0];
+    s1 = kLanczosFloatSumNumer[0];
+    s2 = kLanczosFloatSumDenom[0];
     for (uint32_t uii = 1; uii != 6; ++uii) {
       s1 *= zz;
       s2 *= zz;
-      s1 += kLanczosSumNumer[uii];
-      s2 += kLanczosSumDenom[uii];
+      s1 += kLanczosFloatSumNumer[uii];
+      s2 += kLanczosFloatSumDenom[uii];
     }
   }
   return s1 / s2;
 }
 
-double lanczos_sum_expg_scaled_imp(double zz, double* s2_ptr) {
+double lanczos_sum_f_expg_scaled_imp(double zz, double* s2_ptr) {
   double s1;
   double s2;
   if (zz <= 1) {
-    s1 = kLanczosSumExpgNumer[5];
-    s2 = kLanczosSumDenom[5];
+    s1 = kLanczosFloatSumExpgNumer[5];
+    s2 = kLanczosFloatSumDenom[5];
     for (int32_t ii = 4; ii >= 0; --ii) {
       s1 *= zz;
       s2 *= zz;
-      s1 += kLanczosSumExpgNumer[S_CAST(uint32_t, ii)];
-      s2 += kLanczosSumDenom[S_CAST(uint32_t, ii)];
+      s1 += kLanczosFloatSumExpgNumer[S_CAST(uint32_t, ii)];
+      s2 += kLanczosFloatSumDenom[S_CAST(uint32_t, ii)];
     }
   } else {
     zz = 1 / zz;
-    s1 = kLanczosSumExpgNumer[0];
-    s2 = kLanczosSumDenom[0];
+    s1 = kLanczosFloatSumExpgNumer[0];
+    s2 = kLanczosFloatSumDenom[0];
     for (uint32_t uii = 1; uii != 6; ++uii) {
       s1 *= zz;
       s2 *= zz;
-      s1 += kLanczosSumExpgNumer[uii];
-      s2 += kLanczosSumDenom[uii];
+      s1 += kLanczosFloatSumExpgNumer[uii];
+      s2 += kLanczosFloatSumDenom[uii];
     }
   }
   *s2_ptr = s2;
   return s1;
 }
 
-double lanczos_sum_expg_scaled(double zz) {
+double lanczos_sum_f_expg_scaled(double zz) {
   double s2;
-  double s1 = lanczos_sum_expg_scaled_imp(zz, &s2);
+  double s1 = lanczos_sum_f_expg_scaled_imp(zz, &s2);
   return s1 / s2;
 }
 
-double lanczos_sum_expg_scaled_recip(double zz) {
+double lanczos_sum_f_expg_scaled_recip(double zz) {
   double s2;
-  double s1 = lanczos_sum_expg_scaled_imp(zz, &s2);
+  double s1 = lanczos_sum_f_expg_scaled_imp(zz, &s2);
   return s2 / s1;
 }
 
+// We want better-than-float precision for our log-factorial function.
+static const double kLanczosDoubleSumDenom[13] = {0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535, 2637558, 357423, 32670, 1925, 66, 1};
+static const double kLanczosDoubleSumExpgNumer[13] = {56906521.913471564, 103794043.11634455, 86363131.288138591, 43338889.324676138, 14605578.087685068, 3481712.1549806459, 601859.61716810988, 75999.293040145426, 6955.9996025153761, 449.94455690631681, 19.519927882476175, 0.50984166556566762, 0.0060618423462489065};
+
+// this depends on the polynomial coefficients above
+static const double kLanczosDoubleG = 6.0246800407767296;
+
+double lanczos_sum_d_expg_scaled_imp(double zz, double* s2_ptr) {
+  double s1;
+  double s2;
+  // Only called by Lfact() for now, where zz is guaranteed to be > 1.
+  /*
+  if (zz <= 1) {
+    s1 = kLanczosDoubleSumExpgNumer[12];
+    s2 = kLanczosDoubleSumDenom[12];
+    for (int32_t ii = 11; ii >= 0; --ii) {
+      s1 *= zz;
+      s2 *= zz;
+      s1 += kLanczosDoubleSumExpgNumer[S_CAST(uint32_t, ii)];
+      s2 += kLanczosDoubleSumDenom[S_CAST(uint32_t, ii)];
+    }
+  } else {
+  */
+  zz = 1 / zz;
+  s1 = kLanczosDoubleSumExpgNumer[0];
+  s2 = kLanczosDoubleSumDenom[0];
+  for (uint32_t uii = 1; uii != 13; ++uii) {
+    s1 *= zz;
+    s2 *= zz;
+    s1 += kLanczosDoubleSumExpgNumer[uii];
+    s2 += kLanczosDoubleSumDenom[uii];
+  }
+  // }
+  *s2_ptr = s2;
+  return s1;
+}
+
+double lanczos_sum_d_expg_scaled(double zz) {
+  double s2;
+  double s1 = lanczos_sum_d_expg_scaled_imp(zz, &s2);
+  return s1 / s2;
+}
+
+
 double log1pmx(double xx) {
   // log(1+x) - x
   // assumes abs(xx) < 0.95
@@ -443,17 +487,18 @@ double log1pmx(double xx) {
 // compute (z^a)(e^{-z})/tgamma(a)
 double regularized_gamma_prefix(double aa, double zz) {
   // assumes a == 0.5 if a < 1.  assumes z > 0.
-  // we are fine with float-level precision, so lanczos_n=6, kLanczosG=5.581
+  // we are fine with float-level precision, so lanczos_n=6,
+  // kLanczosFloatG=5.581
   if (aa < 1) {
     return sqrt(zz) * exp(-zz) * (1.0 / kSqrtPi);
   }
-  const double agh = aa + kLanczosG - 0.5;
+  const double agh = aa + kLanczosFloatG - 0.5;
   const double agh_recip = 1.0 / agh;
-  const double dd = ((zz - aa) - (kLanczosG - 0.5)) * agh_recip;
+  const double dd = ((zz - aa) - (kLanczosFloatG - 0.5)) * agh_recip;
   double prefix;
   if ((fabs(dd * dd * aa) <= 100) && (aa > 150)) {
     // abs(dd) < sqrt(2/3) < 0.95
-    prefix = aa * log1pmx(dd) + zz * (0.5 - kLanczosG) * agh_recip;
+    prefix = aa * log1pmx(dd) + zz * (0.5 - kLanczosFloatG) * agh_recip;
     prefix = exp(prefix);
   } else {
     const double alz = aa * log(zz * agh_recip);
@@ -479,7 +524,7 @@ double regularized_gamma_prefix(double aa, double zz) {
       prefix = pow(zz * agh_recip, aa) * exp(amz);
     }
   }
-  prefix *= sqrt(agh * kRecipE) * lanczos_sum_expg_scaled_recip(aa);
+  prefix *= sqrt(agh * kRecipE) * lanczos_sum_f_expg_scaled_recip(aa);
   return prefix;
 }
 
@@ -731,21 +776,22 @@ static const double kLnSqrtPi = 0.5723649429247001;
 // compute -log((z^a)(e^{-z})/tgamma(a))
 double regularized_gamma_prefix_ln(double aa, double zz) {
   // assumes a == 0.5 if a < 1.  assumes z > 0.
-  // we are fine with float-level precision, so lanczos_n=6, kLanczosG=5.581
+  // we are fine with float-level precision, so lanczos_n=6,
+  // kLanczosFloatG=5.581
   if (aa < 1) {
     return -zz + 0.5 * log(zz) - kLnSqrtPi;
   }
-  const double agh = aa + kLanczosG - 0.5;
+  const double agh = aa + kLanczosFloatG - 0.5;
   const double agh_recip = 1.0 / agh;
-  const double dd = ((zz - aa) - (kLanczosG - 0.5)) * agh_recip;
+  const double dd = ((zz - aa) - (kLanczosFloatG - 0.5)) * agh_recip;
   double prefix_ln;
   if ((fabs(dd * dd * aa) <= 100) && (aa > 150)) {
     // abs(dd) < sqrt(2/3) < 0.95
-    prefix_ln = aa * log1pmx(dd) + zz * (0.5 - kLanczosG) * agh_recip;
+    prefix_ln = aa * log1pmx(dd) + zz * (0.5 - kLanczosFloatG) * agh_recip;
   } else {
     prefix_ln = (aa - zz) + aa * log(zz * agh_recip);
   }
-  const double scaled_recip = lanczos_sum_expg_scaled_recip(aa);
+  const double scaled_recip = lanczos_sum_f_expg_scaled_recip(aa);
   return prefix_ln + 0.5 * log(agh * kRecipE * scaled_recip * scaled_recip);
 }
 
@@ -1204,15 +1250,15 @@ double ibeta_series_ln(double aa, double bb, double xx, uint32_t inv) {
 
   const double cc = aa + bb;
 
-  const double agh = aa + kLanczosG - 0.5;
-  const double bgh = bb + kLanczosG - 0.5;
-  const double cgh = cc + kLanczosG - 0.5;
+  const double agh = aa + kLanczosFloatG - 0.5;
+  const double bgh = bb + kLanczosFloatG - 0.5;
+  const double cgh = cc + kLanczosFloatG - 0.5;
   double numer_a;
-  double denom_a = lanczos_sum_expg_scaled_imp(aa, &numer_a);
+  double denom_a = lanczos_sum_f_expg_scaled_imp(aa, &numer_a);
   double numer_b;
-  double denom_b = lanczos_sum_expg_scaled_imp(bb, &numer_b);
+  double denom_b = lanczos_sum_f_expg_scaled_imp(bb, &numer_b);
   double denom_c;
-  double numer_c = lanczos_sum_expg_scaled_imp(cc, &denom_c);
+  double numer_c = lanczos_sum_f_expg_scaled_imp(cc, &denom_c);
   double result = (numer_a * numer_b * numer_c) / (denom_a * denom_b * denom_c);
   double l1 = log(cgh / bgh) * (bb - 0.5);
   double l2 = log(xx * cgh / agh) * aa;
@@ -1265,10 +1311,10 @@ double tgamma_delta_ratio(double zz, double delta) {
   }
   assert(delta == 0.5);
   // Can skip z < epsilon and z + delta == z branches for now.
-  double zgh = zz + kLanczosG - 0.5;
+  double zgh = zz + kLanczosFloatG - 0.5;
   // Also skip fabs(delta) >= 10 branch for now.
   double result = exp((0.5 - zz) * log1p(delta / zgh));
-  result *= lanczos_sum(zz) / lanczos_sum(zz + delta);
+  result *= lanczos_sum_f(zz) / lanczos_sum_f(zz + delta);
   // exploit forced delta == 0.5
   result *= sqrt(kE / (zgh + delta));
   return result;
@@ -1361,15 +1407,15 @@ double ibeta_power_terms_ln(double aa, double bb, double xx, double yy) {
   // prefix always 1
   double cc = aa + bb;
 
-  const double agh = aa + kLanczosG - 0.5;
-  const double bgh = bb + kLanczosG - 0.5;
-  const double cgh = cc + kLanczosG - 0.5;
+  const double agh = aa + kLanczosFloatG - 0.5;
+  const double bgh = bb + kLanczosFloatG - 0.5;
+  const double cgh = cc + kLanczosFloatG - 0.5;
   double numer_a;
-  double denom_a = lanczos_sum_expg_scaled_imp(aa, &numer_a);
+  double denom_a = lanczos_sum_f_expg_scaled_imp(aa, &numer_a);
   double numer_b;
-  double denom_b = lanczos_sum_expg_scaled_imp(bb, &numer_b);
+  double denom_b = lanczos_sum_f_expg_scaled_imp(bb, &numer_b);
   double denom_c;
-  double numer_c = lanczos_sum_expg_scaled_imp(cc, &denom_c);
+  double numer_c = lanczos_sum_f_expg_scaled_imp(cc, &denom_c);
   double result = (numer_a * numer_b * numer_c) / (denom_a * denom_b * denom_c);
   result *= sqrt(agh * bgh * kRecipE / cgh);
   double result_ln = log(result);
@@ -1717,12 +1763,122 @@ double QuantileToZscore(double pp) {
     (((((kIvnB[0]*r+kIvnB[1])*r+kIvnB[2])*r+kIvnB[3])*r+kIvnB[4])*r+1);
 }
 
+static const double kLogFactorials[99] = {
+  0,
+  0,
+  0.69314718055994529,
+  1.791759469228055,
+  3.1780538303479458,
+  4.7874917427820458,
+  6.5792512120101012,
+  8.5251613610654129,
+  10.604602902745251,
+  12.801827480081471,
+  15.104412573075518,
+  17.502307845873887,
+  19.987214495661888,
+  22.552163853123428,
+  25.19122118273868,
+  27.89927138384089,
+  30.671860106080675,
+  33.505073450136891,
+  36.395445208033053,
+  39.339884187199495,
+  42.335616460753485,
+  45.380138898476915,
+  48.471181351835227,
+  51.606675567764377,
+  54.784729398112312,
+  58.003605222980525,
+  61.261701761002001,
+  64.557538627006323,
+  67.88974313718154,
+  71.257038967168015,
+  74.658236348830172,
+  78.092223553315307,
+  81.557959456115029,
+  85.05446701758153,
+  88.580827542197667,
+  92.136175603687093,
+  95.719694542143202,
+  99.330612454787428,
+  102.9681986145138,
+  106.63176026064346,
+  110.32063971475741,
+  114.03421178146171,
+  117.77188139974507,
+  121.53308151543862,
+  125.31727114935688,
+  129.12393363912722,
+  132.95257503561629,
+  136.80272263732635,
+  140.67392364823425,
+  144.56574394634487,
+  148.47776695177302,
+  152.40959258449737,
+  156.3608363030788,
+  160.3311282166309,
+  164.3201122631952,
+  168.32744544842768,
+  172.35279713916282,
+  176.39584840699732,
+  180.45629141754378,
+  184.53382886144945,
+  188.6281734236716,
+  192.73904728784493,
+  196.86618167289001,
+  201.00931639928149,
+  205.16819948264117,
+  209.34258675253682,
+  213.53224149456327,
+  217.73693411395425,
+  221.95644181913033,
+  226.19054832372763,
+  230.43904356577696,
+  234.70172344281826,
+  238.97838956183432,
+  243.26884900298268,
+  247.57291409618685,
+  251.89040220972319,
+  256.22113555000954,
+  260.56494097186322,
+  264.92164979855278,
+  269.29109765101975,
+  273.67312428569375,
+  278.06757344036617,
+  282.47429268763045,
+  286.89313329542699,
+  291.32395009427029,
+  295.7666013507606,
+  300.22094864701415,
+  304.68685676566872,
+  309.1641935801469,
+  313.65282994987905,
+  318.1526396202093,
+  322.66349912672621,
+  327.18528770377526,
+  331.71788719692847,
+  336.26118197919845,
+  340.81505887079902,
+  345.37940706226686,
+  349.95411804077025,
+  354.53908551944079
+};
+
+// port of Boost 1.84 implementation, double-precision
+double Lfact(double xx) {
+  if (xx < 99.0) {
+    const uint32_t uii = S_CAST(int32_t, xx);
+    return kLogFactorials[uii];
+  }
+  const double zz = xx + 1;
+  double result = (log(zz + kLanczosDoubleG - 0.5) - 1) * (zz - 0.5);
+  result += log(lanczos_sum_d_expg_scaled(zz));
+  return result;
+}
 
-// HweP() and HweXchrP() are now licensed as GPL 2+.
-// possible todo: variant which returns -ln(pval), at least up to long double
-// range (i.e. normally use faster notlong-double code, but centerp > DBL_MAX
-// launches the slower long double code path)
-double HweP(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, uint32_t midp) {
+// obs_hets + obs_hom1 + obs_hom2 assumed to be <2^31.
+double HweLnP(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, uint32_t midp) {
   // This function implements an exact SNP test of Hardy-Weinberg
   // Equilibrium as described in Wigginton, JE, Cutler, DJ, and
   // Abecasis, GR (2005) A Note on Exact Tests of Hardy-Weinberg
@@ -1743,10 +1899,12 @@ double HweP(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, uint32_t midp)
   //   sums.
   // - Support for the mid-p variant of this test.  See Graffelman J, Moreno V
   //   (2013) The mid p-value in exact tests for Hardy-Weinberg equilibrium.
+  // - Log-p-value return (added Jan 2024).  A p-value of 1e-400 may be worth
+  //   distinguishing from 1e-40000 in a biobank-scale dataset.
   //
-  // Note that the HweThresh() function below is a lot more efficient for
-  // testing against a p-value inclusion threshold.  HweP() should only be
-  // used if you need the actual p-value.
+  // Note that the HweThreshLn() function is a lot more efficient for testing
+  // against a p-value inclusion threshold.  HweLnP() should only be used if
+  // you need the actual p-value.
   intptr_t obs_homc;
   intptr_t obs_homr;
   if (obs_hom1 < obs_hom2) {
@@ -1756,126 +1914,349 @@ double HweP(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, uint32_t midp)
     obs_homc = obs_hom1;
     obs_homr = obs_hom2;
   }
-  const int64_t rare_copies = 2LL * obs_homr + obs_hets;
-  const int64_t genotypes2 = (obs_hets + obs_homc + obs_homr) * 2LL;
-  if (!genotypes2) {
+  const int64_t rare_ct = 2LL * obs_homr + obs_hets;
+  if (rare_ct < 2) {
     if (midp) {
-      return 0.5;
+      return -kLn2;
     }
-    return 1;
+    return 0;
   }
+  // MAF: rare_ct / allele_ct
+  // modal #hets:
+  //   sample_ct * 2 * MAF * (1 - MAF)
+  // = rare_ct * (1 - MAF)
+  const int64_t sample_ct = obs_hom1 + obs_hom2 + obs_hets;
+  const double rare_ctd = rare_ct;
+  const double sample_ctd = sample_ct;
+  const double allele_ctd = sample_ctd * 2;
+  const double maf = rare_ctd / allele_ctd;
+  const double modal_nhet = rare_ctd * (1 - maf);
+  double curr_hets = obs_hets;
+  double curr_homr = obs_homr;
+  double curr_homc = obs_homc;
+  const double c_minus_r = curr_homc - curr_homr;
   int32_t tie_ct = 1;
-  double curr_hets_t2 = obs_hets;
-  double curr_homr_t2 = obs_homr;
-  double curr_homc_t2 = obs_homc;
-  double tailp = (1 - kSmallEpsilon) * kExactTestBias;
-  double centerp = 0;
-  double lastp2 = tailp;
-  double lastp1 = tailp;
-
-  if (obs_hets * genotypes2 > rare_copies * (genotypes2 - rare_copies)) {
-    // tail 1 = upper
-    while (curr_hets_t2 > 1.5) {
-      // het_probs[curr_hets] = 1
-      // het_probs[curr_hets - 2] = het_probs[curr_hets] * curr_hets * (curr_hets - 1.0)
-      curr_homr_t2 += 1;
-      curr_homc_t2 += 1;
-      lastp2 *= (curr_hets_t2 * (curr_hets_t2 - 1)) / (4 * curr_homr_t2 * curr_homc_t2);
-      curr_hets_t2 -= 2;
-      if (lastp2 < kExactTestBias) {
-        tie_ct += (lastp2 > (1 - 2 * kSmallEpsilon) * kExactTestBias);
-        tailp += lastp2;
-        break;
-      }
-      centerp += lastp2;
-      // doesn't seem to make a difference, but seems best to minimize use of
-      // INFINITY
-      if (centerp > DBL_MAX) {
+  if (curr_hets > modal_nhet) {
+    const double het_delta = curr_hets - modal_nhet;
+    if ((!midp) && (het_delta < 2.0)) {
+      // Fast path for p=1.
+      if (curr_hets * (curr_hets - 1) <= (4 * (1 + kSmallEpsilon)) * (curr_homr + 1) * (curr_homc + 1)) {
         return 0;
       }
     }
-    if ((centerp == 0) && (!midp)) {
-      return 1;
+    // Tried using old algorithm on rare_ct < 64, but this didn't make a
+    // noticeable difference to --hardy execution time on 1000 Genomes data, so
+    // I'd rather not bloat this function further.
+
+    // If we're close enough to the center, we may be best off computing 1 -
+    // <sum of center probabilities>.  But this approach is vulnerable to
+    // catastrophic cancellation.  Doubles have 53 bits of precision, while
+    // kSmallEpsilon (which governs equality checks) is configured to 2^{-44},
+    // so it makes sense to insist that the final subtraction does not cost us
+    // more than ~9 bits.
+    // If the starting cell has probability > 2^{-10}, that is close enough to
+    // true; and I think this catches most of the cases where we actively want
+    // to use the subtract-center strategy?
+    //
+    // From e.g. the Wigginton paper, P(N_{AB}=n_{AB} | N, n_A) is
+    //
+    //      2^{n_{AB}} N! n_A! n_B!
+    //   -----------------------------
+    //   n_{AA}! n_{AB}! n_{BB}! (2N)!
+    const double common_lnprob_component = Lfact(sample_ctd) + Lfact(rare_ctd) + Lfact(allele_ctd - rare_ctd) - Lfact(allele_ctd);
+    const double starting_lnprob_other_component = curr_hets * kLn2 - Lfact(curr_hets) - Lfact(curr_homr) - Lfact(curr_homc);
+    const double starting_lnprob = common_lnprob_component + starting_lnprob_other_component;
+    double lastp = 1;
+    // Either obs_hets >= 2, or obs_homr == obs_hets == obs_homc == 1.  In the
+    // latter case, starting_lnprob will be well over -10 * kLn2, so the other
+    // branch can assume obs_hets >= 2.
+    if (starting_lnprob > -10 * kLn2) {
+      // This condition stops triggering once we have more than ~3 million
+      // samples, but I expect it to be a significant timesaver before that
+      // point.
+      double centerp = 0;
+      // rescale this at the end.
+      while (curr_hets > 1.5) {
+        curr_homr += 1;
+        curr_homc += 1;
+        lastp *= (curr_hets * (curr_hets - 1)) / (4 * curr_homr * curr_homc);
+        curr_hets -= 2;
+        if (lastp < 1 + kSmallEpsilon) {
+          tie_ct += (lastp > 1 - kSmallEpsilon);
+          break;
+        }
+        centerp += lastp;
+      }
+      if (midp) {
+        centerp += S_CAST(double, tie_ct) * 0.5;
+      }
+      const double starting_prob = exp(starting_lnprob);
+      centerp *= starting_prob;
+      return log(1 - centerp);
     }
-    while (curr_hets_t2 > 1.5) {
-      curr_homr_t2 += 1;
-      curr_homc_t2 += 1;
-      lastp2 *= (curr_hets_t2 * (curr_hets_t2 - 1)) / (4 * curr_homr_t2 * curr_homc_t2);
-      curr_hets_t2 -= 2;
+    double tailp = 1;
+    curr_hets += 2;
+    while (curr_homr > 0.5) {
+      lastp *= (4 * curr_homr * curr_homc) / (curr_hets * (curr_hets - 1));
       const double preaddp = tailp;
-      tailp += lastp2;
+      tailp += lastp;
       if (tailp <= preaddp) {
         break;
       }
+      curr_hets += 2;
+      curr_homr -= 1;
+      curr_homc -= 1;
+    }
+    // Now we want to jump near the other tail, without evaluating that many
+    // terms in between.
+    //
+    // Each full log-likelihood evaluation requires 3 Lfact() calls.  Lfact()
+    // tends to be ~2-3 times as expensive as log in my testing, and I've seen
+    // log be anywhere from ~3 to ~12 times as expensive as division, so we
+    // want to limit ourselves to 1-2 full evaluations most of the time.
+    //
+    // The current heuristic starts by reflecting (obs_homr + curr_homr) * 0.5
+    // across the mode, performing a full log-likelihood check at the nearest
+    // valid point.  Hopefully we find that we're in (starting_lnprob - 62 *
+    // kLn2, starting_lnprob], so we're at or near a cell that actually
+    // contributes to the tail-sum.  (This window is chosen to be wide enough
+    // to guarantee that at least one point falls inside when sample_ct <
+    // 2^31.)
+    //
+    // If not, we jump again, using Newton's method.
+    // If curr_homr is too low (i.e. current log-likelihood is too high), when
+    // we increase curr_homr by 1, the likelihood gets multiplied by
+    //   curr_hets * (curr_hets-1) / (4 * (curr_homr+1) * (curr_homc+1))
+    // i.e. we're adding the logarithm of this value to the log-likelihood.
+    // If curr_homr is too high, when we decrease curr_homr by 1, the
+    // likelihood gets multiplied by
+    //   4 * curr_homr * curr_homc / ((curr_hets+2) * (curr_hets+1))
+    // We use the log of the first expression as the Newton's method f'(x) when
+    // we're jumping to higher curr_homr, and the negative-log of the second
+    // expression when we're jumping to lower curr_homr.
+    // f''(x) is always negative, so we can aim for starting_lnprob instead of
+    // the middle of the interval.
+
+    // curr_hets moves twice as fast as curr_homr.  So if we add
+    //   0.5 * (curr_hets + S_CAST(double, obs_hets)) - modal_nhet
+    // to 0.5 * (curr_homr + S_CAST(double, obs_homr)), that reflects curr_homr
+    // across the mode.
+    const double max_homr = S_CAST(double, rare_ct / 2);
+    {
+      const double delta = 0.5 * (curr_hets + S_CAST(double, obs_hets)) - modal_nhet;
+      curr_homr = 0.5 * (curr_homr + S_CAST(double, obs_homr)) + delta;
+      // Round and clamp.  (er, does clamping ever come up in this direction?)
+      curr_homr = S_CAST(double, S_CAST(int32_t, curr_homr + 0.5));
+      if (curr_homr > max_homr) {
+        curr_homr = max_homr;
+      }
+    }
+    // starting_lnprob_other_component is guaranteed to be positive here.
+    // Lfact() should be accurate enough for us to use a smaller-than-usual
+    // epsilon for identifying ties here; this isn't enough to cancel out the
+    // effective increase in epsilon from working in log-space, but it helps.
+    const double max_diff = starting_lnprob_other_component * (kSmallEpsilon / 8);
+    while (1) {
+      curr_hets = rare_ctd - curr_homr * 2;
+      curr_homc = curr_homr + c_minus_r;
+      const double lnprob_other_component = curr_hets * kLn2 - Lfact(curr_hets) - Lfact(curr_homr) - Lfact(curr_homc);
+      const double lnprob_diff = lnprob_other_component - starting_lnprob_other_component;
+      if (lnprob_diff > max_diff) {
+        if (curr_homr >= max_homr) {
+          // All terms on this tail are larger than the starting term.  Exit.
+          // (is this possible?)
+          if (midp) {
+            tailp -= 0.5;
+          }
+          return starting_lnprob + log(tailp);
+        }
+        const double ll_deriv = log(curr_hets * (curr_hets - 1) / (4 * (curr_homr + 1) * (curr_homc + 1)));
+        // Round absolute value up, to guarantee that we make progress.
+        // (lnprob_diff is positive and ll_deriv is negative.)
+        // This may overshoot.  But the function is guaranteed to terminate
+        // because we never overshoot (and we do always make progress on each
+        // step) once we're on the other side.
+        curr_homr += 1 - S_CAST(int64_t, (1 - kSmallEpsilon) * lnprob_diff / ll_deriv);
+        if (curr_homr > max_homr) {
+          curr_homr = max_homr;
+        }
+      } else if (lnprob_diff > -62 * kLn2) {
+        lastp = exp(lnprob_diff);
+        break;
+      } else {
+        const double ll_deriv = log((curr_hets + 2) * (curr_hets + 1) / (4 * curr_homr * curr_homc));
+        // Round down, to guarantee we don't overshoot.
+        // We're guaranteed to make progress, since lnprob_diff >= 62 * log(2)
+        // and sample_ct < 2^31.
+        curr_homr -= S_CAST(int64_t, lnprob_diff / ll_deriv);
+        if (curr_homr < 0) {
+          curr_homr = 0;
+        }
+      }
     }
-    double curr_hets_t1 = obs_hets + 2;
-    double curr_homr_t1 = obs_homr;
-    double curr_homc_t1 = obs_homc;
-    while (curr_homr_t1 > 0.5) {
-      // het_probs[curr_hets + 2] = het_probs[curr_hets] * 4 * curr_homr * curr_homc / ((curr_hets + 2) * (curr_hets + 1))
-      lastp1 *= (4 * curr_homr_t1 * curr_homc_t1) / (curr_hets_t1 * (curr_hets_t1 - 1));
+    // Sum toward center, until lastp >= 1.
+    double lastp_tail = lastp;
+    if (lastp < 1 - kSmallEpsilon) {
+      double curr_homr_center = curr_homr;
+      double curr_homc_center = curr_homc;
+      double curr_hets_center = curr_hets;
+      while (1) {
+        tailp += lastp;
+        curr_hets_center += 2;
+        lastp *= (4 * curr_homr_center * curr_homc_center) / (curr_hets_center * (curr_hets_center - 1));
+        if (lastp >= 1 - kSmallEpsilon) {
+          break;
+        }
+        curr_homr_center -= 1;
+        curr_homc_center -= 1;
+      }
+    }
+    if (lastp < 1 + kSmallEpsilon) {
+      tailp += lastp;
+      ++tie_ct;
+    }
+    // Sum away from center, until sums stop changing.
+    while (1) {
+      curr_homr += 1;
+      curr_homc += 1;
+      lastp_tail *= (curr_hets * (curr_hets - 1)) / (4 * curr_homr * curr_homc);
       const double preaddp = tailp;
-      tailp += lastp1;
+      tailp += lastp_tail;
       if (tailp <= preaddp) {
         break;
       }
-      curr_hets_t1 += 2;
-      curr_homr_t1 -= 1;
-      curr_homc_t1 -= 1;
+      curr_hets -= 2;
     }
-  } else {
-    // tail 1 = lower
-    while (curr_homr_t2 > 0.5) {
-      curr_hets_t2 += 2;
-      lastp2 *= (4 * curr_homr_t2 * curr_homc_t2) / (curr_hets_t2 * (curr_hets_t2 - 1));
-      curr_homr_t2 -= 1;
-      curr_homc_t2 -= 1;
-      if (lastp2 < kExactTestBias) {
-        tie_ct += (lastp2 > (1 - 2 * kSmallEpsilon) * kExactTestBias);
-        tailp += lastp2;
+    if (midp) {
+      tailp -= S_CAST(double, tie_ct) * 0.5;
+    }
+    return starting_lnprob + log(tailp);
+  }
+  // Same as above, just with directions flipped.
+  const double het_delta = modal_nhet - curr_hets;
+  if ((!midp) && (het_delta < 2.0)) {
+    // Fast path for p=1.
+    if ((4 * (1 - kSmallEpsilon)) * curr_homr * curr_homc <= (curr_hets + 2) * (curr_hets + 1)) {
+      return 0;
+    }
+  }
+  const double common_lnprob_component = Lfact(sample_ctd) + Lfact(rare_ctd) + Lfact(allele_ctd - rare_ctd) - Lfact(allele_ctd);
+  const double starting_lnprob_other_component = curr_hets * kLn2 - Lfact(curr_hets) - Lfact(curr_homr) - Lfact(curr_homc);
+  const double starting_lnprob = common_lnprob_component + starting_lnprob_other_component;
+  double lastp = 1;
+  if (starting_lnprob > -10 * kLn2) {
+    double centerp = 0;
+    // rescale this at the end.
+    while (curr_homr > 0.5) {
+      curr_hets += 2;
+      lastp *= 4 * curr_homr * curr_homc / (curr_hets * (curr_hets - 1));
+      curr_homr -= 1;
+      curr_homc -= 1;
+      if (lastp < 1 + kSmallEpsilon) {
+        tie_ct += (lastp > 1 - kSmallEpsilon);
         break;
       }
-      centerp += lastp2;
-      if (centerp > DBL_MAX) {
-        return 0;
-      }
+      centerp += lastp;
     }
-    if ((centerp == 0) && (!midp)) {
-      return 1;
+    if (midp) {
+      centerp += S_CAST(double, tie_ct) * 0.5;
+    }
+    const double starting_prob = exp(starting_lnprob);
+    centerp *= starting_prob;
+    return log(1 - centerp);
+  }
+  double tailp = 1;
+  while (curr_hets > 1.5) {
+    curr_homr += 1;
+    curr_homc += 1;
+    lastp *= curr_hets * (curr_hets - 1) / (4 * curr_homr * curr_homc);
+    curr_hets -= 2;
+    const double preaddp = tailp;
+    tailp += lastp;
+    if (tailp <= preaddp) {
+      break;
     }
-    while (curr_homr_t2 > 0.5) {
-      curr_hets_t2 += 2;
-      lastp2 *= (4 * curr_homr_t2 * curr_homc_t2) / (curr_hets_t2 * (curr_hets_t2 - 1));
-      curr_homr_t2 -= 1;
-      curr_homc_t2 -= 1;
-      const double preaddp = tailp;
-      tailp += lastp2;
-      if (tailp <= preaddp) {
-        break;
+  }
+  // Jump to other tail.
+  {
+    const double delta = modal_nhet - 0.5 * (curr_hets + S_CAST(double, obs_hets));
+    curr_homr = 0.5 * (curr_homr + S_CAST(double, obs_homr)) - delta;
+    // Round and clamp.
+    curr_homr = S_CAST(double, S_CAST(int32_t, curr_homr + 0.5));
+    if (curr_homr < 0) {
+      curr_homr = 0;
+    }
+  }
+  const double max_diff = starting_lnprob_other_component * (kSmallEpsilon / 8);
+  const double max_homr = S_CAST(double, rare_ct / 2);
+  while (1) {
+    curr_hets = rare_ctd - curr_homr * 2;
+    curr_homc = curr_homr + c_minus_r;
+    const double lnprob_other_component = curr_hets * kLn2 - Lfact(curr_hets) - Lfact(curr_homr) - Lfact(curr_homc);
+    const double lnprob_diff = lnprob_other_component - starting_lnprob_other_component;
+    if (lnprob_diff > max_diff) {
+      if (curr_homr <= 0) {
+        // All terms on this tail are larger than the starting term.  Exit.
+        if (midp) {
+          tailp -= 0.5;
+        }
+        return starting_lnprob + log(tailp);
+      }
+      const double ll_deriv = log(4 * curr_homr * curr_homc / ((curr_hets + 2) * (curr_hets + 1)));
+      curr_homr += S_CAST(int64_t, (1 - kSmallEpsilon) * lnprob_diff / ll_deriv) - 1;
+      if (curr_homr < 0) {
+        curr_homr = 0;
+      }
+    } else if (lnprob_diff > -62 * kLn2) {
+      lastp = exp(lnprob_diff);
+      break;
+    } else {
+      const double ll_deriv = log(4 * (curr_homr + 1) * (curr_homc + 1) / (curr_hets * (curr_hets - 1)));
+      curr_homr += S_CAST(int64_t, lnprob_diff / ll_deriv);
+      if (curr_homr > max_homr) {
+        curr_homr = max_homr;
       }
     }
-    double curr_hets_t1 = obs_hets;
-    double curr_homr_t1 = obs_homr;
-    double curr_homc_t1 = obs_homc;
-    while (curr_hets_t1 > 1.5) {
-      curr_homr_t1 += 1;
-      curr_homc_t1 += 1;
-      lastp1 *= (curr_hets_t1 * (curr_hets_t1 - 1)) / (4 * curr_homr_t1 * curr_homc_t1);
-      const double preaddp = tailp;
-      tailp += lastp1;
-      if (tailp <= preaddp) {
+  }
+  // Sum toward center, until lastp >= 1.
+  double lastp_tail = lastp;
+  if (lastp < 1 - kSmallEpsilon) {
+    double curr_homr_center = curr_homr;
+    double curr_homc_center = curr_homc;
+    double curr_hets_center = curr_hets;
+    while (1) {
+      tailp += lastp;
+      curr_homr_center += 1;
+      curr_homc_center += 1;
+      lastp *= curr_hets_center * (curr_hets_center - 1) / (4 * curr_homr_center * curr_homc_center);
+      if (lastp >= 1 - kSmallEpsilon) {
         break;
       }
-      curr_hets_t1 -= 2;
+      curr_hets_center -= 2;
     }
   }
-  if (!midp) {
-    return tailp / (tailp + centerp);
+  if (lastp < 1 + kSmallEpsilon) {
+    tailp += lastp;
+    ++tie_ct;
   }
-  return (tailp - ((1 - kSmallEpsilon) * kExactTestBias * 0.5) * tie_ct) / (tailp + centerp);
+  // Sum away from center, until sums stop changing.
+  while (1) {
+    curr_hets += 2;
+    lastp_tail *= 4 * curr_homr * curr_homc / (curr_hets * (curr_hets - 1));
+    const double preaddp = tailp;
+    tailp += lastp_tail;
+    if (tailp <= preaddp) {
+      break;
+    }
+    curr_homr -= 1;
+    curr_homc -= 1;
+  }
+  if (midp) {
+    tailp -= S_CAST(double, tie_ct) * 0.5;
+  }
+  return starting_lnprob + log(tailp);
 }
 
-// don't see any point to supporting long double range here
 uint32_t HweThresh(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, double thresh) {
   // Threshold-test-only version of HweP() which is usually able to exit
   // from the calculation earlier.  Returns 0 if these counts are close enough
@@ -2307,6 +2688,267 @@ uint32_t HweThreshMidp(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, dou
   return 1;
 }
 
+uint32_t HweThreshLnMain(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, uint32_t midp, double ln_thresh) {
+  // Threshold-test-only version of HweLnP() which is usually able to exit
+  // from the calculation earlier.  Returns 0 if these counts are close enough
+  // to Hardy-Weinberg equilibrium, 1 otherwise.
+  //
+  // Assumes ln_thresh < -708.
+  //
+  // Caller is responsible for including a tolerance in ln_thresh when
+  // appropriate; the only tolerance applied by this function is in the context
+  // of other-tail tie detection.
+  intptr_t obs_homc;
+  intptr_t obs_homr;
+  if (obs_hom1 < obs_hom2) {
+    obs_homc = obs_hom2;
+    obs_homr = obs_hom1;
+  } else {
+    obs_homc = obs_hom1;
+    obs_homr = obs_hom2;
+  }
+  const int64_t rare_ct = 2LL * obs_homr + obs_hets;
+  // Change this to "rare_ct < 2" if ln_thresh restriction is being loosened
+  // (to e.g. compare results against HweThresh()).
+  if (rare_ct < 64) {
+    return 0;
+  }
+  const int64_t sample_ct = obs_hom1 + obs_hom2 + obs_hets;
+  const double rare_ctd = rare_ct;
+  const double sample_ctd = sample_ct;
+  const double allele_ctd = sample_ctd * 2;
+
+  // 1. Compute log-likelihood of test cell.  This may be high enough on its
+  //    own to immediately return 0.
+  //    If likelihood is lower than threshold / <total # of cells>, we can
+  //    immediately return 1.
+  // 2. Determine tailsum we must hit to return 0.
+  // 3. The rest follows HweLnP(), except with an extra geometric-series-based
+  //    early-exit attempt near the end.
+  const double common_lnprob_component = Lfact(sample_ctd) + Lfact(rare_ctd) + Lfact(allele_ctd - rare_ctd) - Lfact(allele_ctd);
+  double curr_hets = obs_hets;
+  double curr_homr = obs_homr;
+  double curr_homc = obs_homc;
+  const double starting_lnprob_other_component = curr_hets * kLn2 - Lfact(curr_hets) - Lfact(curr_homr) - Lfact(curr_homc);
+  const double starting_lnprob = common_lnprob_component + starting_lnprob_other_component;
+  const double midp_d = u31tod(midp);
+  if (ln_thresh <= starting_lnprob - midp_d * kLn2) {
+    return 0;
+  }
+  const double max_homr = S_CAST(double, rare_ct / 2);
+  if (ln_thresh > starting_lnprob + log(max_homr + 1 - midp_d * 0.5)) {
+    return 1;
+  }
+
+  const double maf = rare_ctd / allele_ctd;
+  const double modal_nhet = rare_ctd * (1 - maf);
+  const double c_minus_r = curr_homc - curr_homr;
+  // This should be in (0.5, 2^30].
+  const double tail_thresh = exp(ln_thresh - starting_lnprob);
+  double tailp = 1 - midp_d * 0.5;
+  double lastp = 1;
+  if (curr_hets > modal_nhet) {
+    // No center-sum (or p=1) code path, since it doesn't make sense to choose
+    // a HWE threshold that makes these relevant; we should have already
+    // returned 0.
+    // (So we can't assume obs_hets >= 2.)
+    curr_hets += 2;
+    while (curr_homr > 0.5) {
+      lastp *= (4 * curr_homr * curr_homc) / (curr_hets * (curr_hets - 1));
+      const double preaddp = tailp;
+      tailp += lastp;
+      if (tailp <= preaddp) {
+        break;
+      }
+      if (tailp >= tail_thresh) {
+        return 0;
+      }
+      curr_hets += 2;
+      curr_homr -= 1;
+      curr_homc -= 1;
+    }
+    {
+      const double delta = 0.5 * (curr_hets + S_CAST(double, obs_hets)) - modal_nhet;
+      curr_homr = 0.5 * (curr_homr + S_CAST(double, obs_homr)) + delta;
+      curr_homr = S_CAST(double, S_CAST(int32_t, curr_homr + 0.5));
+      if (curr_homr > max_homr) {
+        curr_homr = max_homr;
+      }
+    }
+    const double max_diff = starting_lnprob_other_component * (kSmallEpsilon / 8);
+    while (1) {
+      curr_hets = rare_ctd - curr_homr * 2;
+      curr_homc = curr_homr + c_minus_r;
+      const double lnprob_other_component = curr_hets * kLn2 - Lfact(curr_hets) - Lfact(curr_homr) - Lfact(curr_homc);
+      const double lnprob_diff = lnprob_other_component - starting_lnprob_other_component;
+      if (lnprob_diff > max_diff) {
+        if (curr_homr >= max_homr) {
+          return 1;
+        }
+        const double ll_deriv = log(curr_hets * (curr_hets - 1) / (4 * (curr_homr + 1) * (curr_homc + 1)));
+        curr_homr += 1 - S_CAST(int64_t, (1 - kSmallEpsilon) * lnprob_diff / ll_deriv);
+        if (curr_homr > max_homr) {
+          curr_homr = max_homr;
+        }
+      } else if (lnprob_diff > -62 * kLn2) {
+        lastp = exp(lnprob_diff);
+        break;
+      } else {
+        const double ll_deriv = log((curr_hets + 2) * (curr_hets + 1) / (4 * curr_homr * curr_homc));
+        curr_homr -= S_CAST(int64_t, lnprob_diff / ll_deriv);
+        if (curr_homr < 0) {
+          curr_homr = 0;
+        }
+      }
+    }
+    double lastp_tail = lastp;
+    if (lastp < 1 - kSmallEpsilon) {
+      double curr_homr_center = curr_homr;
+      double curr_homc_center = curr_homc;
+      double curr_hets_center = curr_hets;
+      while (1) {
+        tailp += lastp;
+        if (tailp >= tail_thresh) {
+          return 0;
+        }
+        curr_hets_center += 2;
+        lastp *= (4 * curr_homr_center * curr_homc_center) / (curr_hets_center * (curr_hets_center - 1));
+        if (lastp >= 1 - kSmallEpsilon) {
+          break;
+        }
+        curr_homr_center -= 1;
+        curr_homc_center -= 1;
+      }
+    }
+    if (lastp < 1 + kSmallEpsilon) {
+      tailp += lastp * (1 - midp_d * 0.5);
+      if (tailp >= tail_thresh) {
+        return 0;
+      }
+    }
+    // We're down to one tail that can be tightly bounded by a geometric
+    // series.  (ratio is always decreasing)
+    // c + cr + cr^2 + ... = c/(1-r)
+    curr_homr += 1;
+    curr_homc += 1;
+    const double cur_ratio = (curr_hets * (curr_hets - 1)) / (4 * curr_homr * curr_homc);
+    lastp_tail *= cur_ratio;
+    const double remaining_ceil = lastp_tail / (1 - cur_ratio);
+    if (tailp + remaining_ceil < tail_thresh) {
+      return 1;
+    }
+    while (1) {
+      const double preaddp = tailp;
+      tailp += lastp_tail;
+      if (tailp <= preaddp) {
+        return 1;
+      }
+      if (tailp >= tail_thresh) {
+        return 0;
+      }
+      curr_hets -= 2;
+      curr_homr += 1;
+      curr_homc += 1;
+      lastp_tail *= (curr_hets * (curr_hets - 1)) / (4 * curr_homr * curr_homc);
+    }
+    // unreachable
+  }
+  while (curr_hets > 1.5) {
+    curr_homr += 1;
+    curr_homc += 1;
+    lastp *= curr_hets * (curr_hets - 1) / (4 * curr_homr * curr_homc);
+    curr_hets -= 2;
+    const double preaddp = tailp;
+    tailp += lastp;
+    if (tailp <= preaddp) {
+      break;
+    }
+    if (tailp >= tail_thresh) {
+      return 0;
+    }
+  }
+  {
+    const double delta = modal_nhet - 0.5 * (curr_hets + S_CAST(double, obs_hets));
+    curr_homr = 0.5 * (curr_homr + S_CAST(double, obs_homr)) - delta;
+    curr_homr = S_CAST(double, S_CAST(int32_t, curr_homr + 0.5));
+    if (curr_homr < 0) {
+      curr_homr = 0;
+    }
+  }
+  const double max_diff = starting_lnprob_other_component * (kSmallEpsilon / 8);
+  while (1) {
+    curr_hets = rare_ctd - curr_homr * 2;
+    curr_homc = curr_homr + c_minus_r;
+    const double lnprob_other_component = curr_hets * kLn2 - Lfact(curr_hets) - Lfact(curr_homr) - Lfact(curr_homc);
+    const double lnprob_diff = lnprob_other_component - starting_lnprob_other_component;
+    if (lnprob_diff > max_diff) {
+      if (curr_homr <= 0) {
+        return 1;
+      }
+      const double ll_deriv = log(4 * curr_homr * curr_homc / ((curr_hets + 2) * (curr_hets + 1)));
+      curr_homr += S_CAST(int64_t, (1 - kSmallEpsilon) * lnprob_diff / ll_deriv) - 1;
+      if (curr_homr < 0) {
+        curr_homr = 0;
+      }
+    } else if (lnprob_diff > -62 * kLn2) {
+      lastp = exp(lnprob_diff);
+      break;
+    } else {
+      const double ll_deriv = log(4 * (curr_homr + 1) * (curr_homc + 1) / (curr_hets * (curr_hets - 1)));
+      curr_homr += S_CAST(int64_t, lnprob_diff / ll_deriv);
+      if (curr_homr > max_homr) {
+        curr_homr = max_homr;
+      }
+    }
+  }
+  double lastp_tail = lastp;
+  if (lastp < 1 - kSmallEpsilon) {
+    double curr_homr_center = curr_homr;
+    double curr_homc_center = curr_homc;
+    double curr_hets_center = curr_hets;
+    while (1) {
+      tailp += lastp;
+      if (tailp >= tail_thresh) {
+        return 0;
+      }
+      curr_homr_center += 1;
+      curr_homc_center += 1;
+      lastp *= curr_hets_center * (curr_hets_center - 1) / (4 * curr_homr_center * curr_homc_center);
+      if (lastp >= 1 - kSmallEpsilon) {
+        break;
+      }
+      curr_hets_center -= 2;
+    }
+  }
+  if (lastp < 1 + kSmallEpsilon) {
+    tailp += lastp * (1 - midp_d * 0.5);
+    if (tailp >= tail_thresh) {
+      return 0;
+    }
+  }
+  curr_hets += 2;
+  const double cur_ratio = 4 * curr_homr * curr_homc / (curr_hets * (curr_hets - 1));
+  lastp_tail *= cur_ratio;
+  const double remaining_ceil = lastp_tail / (1 - cur_ratio);
+  if (tailp + remaining_ceil < tail_thresh) {
+    return 1;
+  }
+  while (1) {
+    const double preaddp = tailp;
+    tailp += lastp_tail;
+    if (tailp <= preaddp) {
+      return 1;
+    }
+    if (tailp >= tail_thresh) {
+      return 0;
+    }
+    curr_homr -= 1;
+    curr_homc -= 1;
+    curr_hets += 2;
+    lastp_tail *= 4 * curr_homr * curr_homc / (curr_hets * (curr_hets - 1));
+  }
+}
+
 // 2^{-40} for now, since 2^{-44} was too small on real data
 static const double kExactTestEpsilon2 = 0.0000000000009094947017729282379150390625;
 
@@ -2685,12 +3327,13 @@ uint32_t HweFirstRow(double hetab, double homa, double homb, double* tailp_ptr,
   return 0;
 }
 
+// HweXchrP() is licensed as GPL 2+.
 double HweXchrP(int32_t female_hets, int32_t female_hom1, int32_t female_hom2, int32_t male1, int32_t male2, uint32_t midp) {
   // See Graffelman J, Weir BS (2016) Testing for Hardy-Weinberg equilibrium at
   // biallelic genetic markers on the X chromosome.
   // Evaluation strategy is similar to fisher23().
   if ((!male1) && (!male2)) {
-    return HweP(female_hets, female_hom1, female_hom2, midp);
+    return exp(HweLnP(female_hets, female_hom1, female_hom2, midp));
   }
   // 1. Determine relative tail vs. center masses for the male1/male2-unchanged
   //    slice.
@@ -2855,6 +3498,482 @@ double HweXchrP(int32_t female_hets, int32_t female_hom1, int32_t female_hom2, i
   return (tailp - ((1 - kExactTestEpsilon2) * kExactTestBias * 0.5) * S_CAST(int32_t, tie_ct)) / (tailp + centerp);
 }
 
+/*
+void HweLnFirstRow(double hetab, double homa, double homb, double* tailp_ptr, double* starting_lnprob_ptr, uint32_t* tie_ct_ptr, double* orig_base_lnprobl_ptr, double* orig_base_lnprobr_ptr, double* orig_saved_lhets_ptr, double* orig_saved_lhoma_ptr, double* orig_saved_lhomb_ptr, double* orig_saved_rhets_ptr, double* orig_saved_rhoma_ptr, double* orig_saved_rhomb_ptr) {
+  const double sample_ctd = hetab + homa + homb;
+  const double a_ctd = hetab + 2 * homa;
+  const double b_ctd = hetab + 2 * homb;
+  const double allele_ctd = sample_ctd * 2;
+  const double common_lnprob_component = Lfact(sample_ctd) + Lfact(a_ctd) + Lfact(b_ctd) - Lfact(allele_ctd);
+  const double starting_lnprob_other_component = hetab * kLn2 - Lfact(hetab) - Lfact(homa) - Lfact(homb);
+  const double starting_lnprob = common_lnprob_component + starting_lnprob_other_component;
+  *starting_lnprob_ptr = starting_lnprob;
+  double lastp = 1;
+  uint32_t tie_ct = 1;
+  double tmp_hets = hetab;
+  if (hetab * hetab > 4 * homa * homb) {
+    // Incrementing hetab decreases likelihood from this point on.
+    *orig_base_lnprobr_ptr = 0;
+    *orig_saved_rhets_ptr = hetab;
+    *orig_saved_rhoma_ptr = homa;
+    *orig_saved_rhomb_ptr = homb;
+    if (starting_lnprob > -10 * kLn2) {
+      // Sum center instead of tail.
+      double centerp = 0;
+      double tmp_homa = homa;
+      double tmp_homb = homb;
+      while (tmp_hets > 1.5) {
+        tmp_homa += 1;
+        tmp_homb += 1;
+        lastp *= (tmp_hets * (tmp_hets - 1)) / (4 * tmp_homa * tmp_homb);
+        tmp_hets -= 2;
+        if (lastp < 1 + kSmallEpsilon) {
+          tie_ct += (lastp > 1 - kSmallEpsilon);
+          break;
+        }
+        centerp += lastp;
+      }
+      *tie_ct_ptr = tie_ct;
+      *orig_saved_lhets_ptr = tmp_hets;
+      *orig_saved_lhoma_ptr = tmp_homa;
+      *orig_saved_lhomb_ptr = tmp_homb;
+      *orig_base_lnprobl_ptr = log(lastp);
+      *tailp_ptr = (1 - centerp) * exp(-starting_lnprob);
+      return;
+    }
+    double* orig_saved_lhomr_ptr = orig_saved_lhoma_ptr;
+    double* orig_saved_lhomc_ptr = orig_saved_lhomb_ptr;
+    double rare_ctd = a_ctd;
+    double c_minus_r = b_ctd - a_ctd;
+    double tmp_homr = homa;
+    double tmp_homc = homb;
+    if (c_minus_r < 0.0) {
+      orig_saved_lhomr_ptr = orig_saved_lhomb_ptr;
+      orig_saved_lhomc_ptr = orig_saved_lhoma_ptr;
+      rare_ctd = b_ctd;
+      c_minus_r = -c_minus_r;
+      tmp_homr = homb;
+      tmp_homc = homa;
+    }
+    const double orig_homr = tmp_homr;
+    double tailp = 1;
+    while (tmp_homr > 0.5) {
+      tmp_hets += 2;
+      lastp *= (4 * tmp_homr * tmp_homc) / (tmp_hets * (tmp_hets - 1));
+      const double preaddp = tailp;
+      tailp += lastp;
+      if (tailp <= preaddp) {
+        break;
+      }
+      tmp_homr -= 1;
+      tmp_homc -= 1;
+    }
+    const double maf = rare_ctd / allele_ctd;
+    const double modal_nhet = rare_ctd * (1 - maf);
+    const double max_homr = S_CAST(double, S_CAST(int32_t, rare_ctd * 0.5));
+    {
+      const double delta = 0.5 * (tmp_hets + hetab) - modal_nhet;
+      // Round and clamp.
+      tmp_homr = 0.5 * (tmp_homr + orig_homr) + delta;
+      tmp_homr = S_CAST(double, S_CAST(int32_t, tmp_homr + 0.5));
+      if (tmp_homr > max_homr) {
+        tmp_homr = max_homr;
+      }
+    }
+    const double max_diff = starting_lnprob_other_component * (kSmallEpsilon / 8);
+    while (1) {
+      tmp_hets = rare_ctd - tmp_homr * 2;
+      tmp_homc = tmp_homr + c_minus_r;
+      const double lnprob_other_component = tmp_hets * kLn2 - Lfact(tmp_hets) - Lfact(tmp_homr) - Lfact(tmp_homc);
+      const double lnprob_diff = lnprob_other_component - starting_lnprob_other_component;
+      if (lnprob_diff > max_diff) {
+        if (tmp_homr >= max_homr) {
+          // All terms on this tail are larger than the starting term.  Exit.
+          assert(tmp_homr == max_homr);
+          *tie_ct_ptr = 1;
+          *orig_saved_lhets_ptr = tmp_hets;
+          *orig_saved_lhomr_ptr = tmp_homr;
+          *orig_saved_lhomc_ptr = tmp_homc;
+          // exp(lnprob_diff) can be too large to fit in a double, so we make
+          // this return value a log-probability.
+          *orig_base_lnprobl_ptr = lnprob_diff;
+          *tailp_ptr = tailp;
+          return;
+        }
+        const double ll_deriv = log(tmp_hets * (tmp_hets - 1) / (4 * (tmp_homr + 1) * (tmp_homc + 1)));
+        // Round absolute value up, to guarantee that we make progress.
+        // (lnprob_diff is positive and ll_deriv is negative.)
+        // This may overshoot.  But the function is guaranteed to terminate
+        // because we never overshoot (and we do always make progress on each
+        // step) once we're on the other side.
+        tmp_homr += 1 - S_CAST(int64_t, (1 - kSmallEpsilon) * lnprob_diff / ll_deriv);
+        if (tmp_homr > max_homr) {
+          tmp_homr = max_homr;
+        }
+      } else if (lnprob_diff > -62 * kLn2) {
+        lastp = exp(lnprob_diff);
+        break;
+      } else {
+        const double ll_deriv = log((tmp_hets + 2) * (tmp_hets + 1) / (4 * tmp_homr * tmp_homc));
+        // Round down, to guarantee we don't overshoot.
+        // We're guaranteed to make progress, since lnprob_diff >= 62 * log(2)
+        // and sample_ct < 2^31.
+        tmp_homr -= S_CAST(int64_t, lnprob_diff / ll_deriv);
+        if (tmp_homr < 0) {
+          tmp_homr = 0;
+        }
+      }
+    }
+    // Sum toward center, until lastp >= 1.
+    double lastp_tail = lastp;
+    {
+      double tmp_homr_center = tmp_homr;
+      double tmp_homc_center = tmp_homc;
+      double tmp_hets_center = tmp_hets;
+      // TODO: make sure we don't blow past mode due to floating-point error,
+      // and add the same term twice
+      while (lastp < 1 - kSmallEpsilon) {
+        tailp += lastp;
+        tmp_hets_center += 2;
+        lastp *= (4 * tmp_homr_center * tmp_homc_center) / (tmp_hets_center * (tmp_hets_center - 1));
+        tmp_homr_center -= 1;
+        tmp_homc_center -= 1;
+      }
+      if (lastp < 1 + kSmallEpsilon) {
+        tailp += lastp;
+        ++tie_ct;
+      }
+      *tie_ct_ptr = tie_ct;
+      *orig_saved_lhets_ptr = tmp_hets_center;
+      *orig_saved_lhomr_ptr = tmp_homr_center;
+      *orig_saved_lhomc_ptr = tmp_homc_center;
+      *orig_base_lnprobl_ptr = log(lastp);
+    }
+    // Sum away from center, until sums stop changing.
+    while (1) {
+      tmp_homr += 1;
+      tmp_homc += 1;
+      lastp_tail *= (tmp_hets * (tmp_hets - 1)) / (4 * tmp_homr * tmp_homc);
+      const double preaddp = tailp;
+      tailp += lastp_tail;
+      if (tailp <= preaddp) {
+        break;
+      }
+      tmp_hets -= 2;
+    }
+    *tailp_ptr = tailp;
+    return;
+  }
+  // Decrementing hetab decreases likelihood from this point on.
+  *orig_base_lnprobl_ptr = 0;
+  *orig_saved_lhets_ptr = hetab;
+  *orig_saved_lhoma_ptr = homa;
+  *orig_saved_lhomb_ptr = homb;
+  if (starting_lnprob > -10 * kLn2) {
+    const double tmp_hets_stop = MAXV(a_ctd, b_ctd) - 1.5;
+    double centerp = 0;
+    double tmp_homa = homa;
+    double tmp_homb = homb;
+    while (tmp_hets < tmp_hets_stop) {
+      tmp_hets += 2;
+      lastp *= 4 * tmp_homa * tmp_homb / (tmp_hets * (tmp_hets - 1));
+      tmp_homa -= 1;
+      tmp_homb -= 1;
+      if (lastp < 1 + kSmallEpsilon) {
+        tie_ct += (lastp > 1 - kSmallEpsilon);
+        break;
+      }
+      centerp += lastp;
+    }
+    *tie_ct_ptr = tie_ct;
+    *orig_saved_rhets_ptr = tmp_hets;
+    *orig_saved_rhoma_ptr = tmp_homa;
+    *orig_saved_rhomb_ptr = tmp_homb;
+    *orig_base_lnprobr_ptr = log(lastp);
+    *tailp_ptr = (1 - centerp) * exp(-starting_lnprob);
+    return;
+  }
+  double* orig_saved_rhomr_ptr = orig_saved_rhoma_ptr;
+  double* orig_saved_rhomc_ptr = orig_saved_rhomb_ptr;
+  double rare_ctd = a_ctd;
+  double c_minus_r = b_ctd - a_ctd;
+  double tmp_homr = homa;
+  double tmp_homc = homb;
+  if (c_minus_r < 0.0) {
+    orig_saved_rhomr_ptr = orig_saved_rhomb_ptr;
+    orig_saved_rhomc_ptr = orig_saved_rhoma_ptr;
+    rare_ctd = b_ctd;
+    c_minus_r = -c_minus_r;
+    tmp_homr = homb;
+    tmp_homc = homa;
+  }
+  double orig_homr = tmp_homr;
+  double tailp = 1;
+  while (tmp_hets > 1.5) {
+    tmp_homr += 1;
+    tmp_homc += 1;
+    lastp *= tmp_hets * (tmp_hets - 1) / (4 * tmp_homr * tmp_homc);
+    tmp_hets -= 2;
+    const double preaddp = tailp;
+    tailp += lastp;
+    if (tailp <= preaddp) {
+      break;
+    }
+  }
+  // Jump to other tail.
+  // TODO
+  {
+    const double delta = modal_nhet - 0.5 * (tmp_hets + S_CAST(double, obs_hets));
+    tmp_homr = 0.5 * (tmp_homr + S_CAST(double, obs_homr)) - delta;
+    // Round and clamp.
+    tmp_homr = S_CAST(double, S_CAST(int32_t, tmp_homr + 0.5));
+    if (tmp_homr < 0) {
+      tmp_homr = 0;
+    }
+  }
+  const double max_diff = starting_lnprob_other_component * (kSmallEpsilon / 8);
+  const double max_homr = S_CAST(double, rare_ct / 2);
+  while (1) {
+    tmp_hets = rare_ctd - tmp_homr * 2;
+    tmp_homc = tmp_homr + c_minus_r;
+    const double lnprob_other_component = tmp_hets * kLn2 - Lfact(tmp_hets) - Lfact(tmp_homr) - Lfact(tmp_homc);
+    const double lnprob_diff = lnprob_other_component - starting_lnprob_other_component;
+    if (lnprob_diff > max_diff) {
+      if (tmp_homr <= 0) {
+        // All terms on this tail are larger than the starting term.  Exit.
+        if (midp) {
+          tailp -= 0.5;
+        }
+        return starting_lnprob + log(tailp);
+      }
+      const double ll_deriv = log(4 * tmp_homr * tmp_homc / ((tmp_hets + 2) * (tmp_hets + 1)));
+      tmp_homr += S_CAST(int64_t, (1 - kSmallEpsilon) * lnprob_diff / ll_deriv) - 1;
+      if (tmp_homr < 0) {
+        tmp_homr = 0;
+      }
+    } else if (lnprob_diff > -62 * kLn2) {
+      lastp = exp(lnprob_diff);
+      break;
+    } else {
+      const double ll_deriv = log(4 * (tmp_homr + 1) * (tmp_homc + 1) / (tmp_hets * (tmp_hets - 1)));
+      tmp_homr += S_CAST(int64_t, lnprob_diff / ll_deriv);
+      if (tmp_homr > max_homr) {
+        tmp_homr = max_homr;
+      }
+    }
+  }
+  // Sum toward center, until lastp >= 1.
+  double lastp_tail = lastp;
+  if (lastp < 1 - kSmallEpsilon) {
+    double tmp_homr_center = tmp_homr;
+    double tmp_homc_center = tmp_homc;
+    double tmp_hets_center = tmp_hets;
+    while (1) {
+      tailp += lastp;
+      tmp_homr_center += 1;
+      tmp_homc_center += 1;
+      lastp *= tmp_hets_center * (tmp_hets_center - 1) / (4 * tmp_homr_center * tmp_homc_center);
+      if (lastp >= 1 - kSmallEpsilon) {
+        break;
+      }
+      tmp_hets_center -= 2;
+    }
+  }
+  if (lastp < 1 + kSmallEpsilon) {
+    tailp += lastp;
+    ++tie_ct;
+  }
+  // Sum away from center, until sums stop changing.
+  while (1) {
+    tmp_hets += 2;
+    lastp_tail *= 4 * tmp_homr * tmp_homc / (tmp_hets * (tmp_hets - 1));
+    const double preaddp = tailp;
+    tailp += lastp_tail;
+    if (tailp <= preaddp) {
+      break;
+    }
+    tmp_homr -= 1;
+    tmp_homc -= 1;
+  }
+  if (midp) {
+    tailp -= S_CAST(double, tie_ct) * 0.5;
+  }
+  return starting_lnprob + log(tailp);
+}
+  */
+
+/*
+double HweXchrLnP(int32_t female_hets, int32_t female_hom1, int32_t female_hom2, int32_t male1, int32_t male2, uint32_t midp) {
+  // See Graffelman J, Weir BS (2016) Testing for Hardy-Weinberg equilibrium at
+  // biallelic genetic markers on the X chromosome.
+  // Evaluation strategy is similar to fisher23().
+  if ((!male1) && (!male2)) {
+    return HweLnP(female_hets, female_hom1, female_hom2, midp);
+  }
+  // 1. Determine relative tail vs. center masses for the male1/male2-unchanged
+  //    slice.
+  double cur_female_hetd = S_CAST(double, female_hets);
+  double cur_female_hom1d = S_CAST(double, female_hom1);
+  double cur_female_hom2d = S_CAST(double, female_hom2);
+  double n1 = cur_female_hetd + 2 * cur_female_hom1d;
+  double n2 = cur_female_hetd + 2 * cur_female_hom2d;
+  double tailp;
+  double starting_lnprob;
+  uint32_t tie_ct;
+  // "left" = low hets side, "right" = high hets side
+  double orig_base_probl;
+  double orig_base_probr;
+  double orig_saved_lhets;
+  double orig_saved_lhom1;
+  double orig_saved_lhom2;
+  double orig_saved_rhets;
+  double orig_saved_rhom1;
+  double orig_saved_rhom2;
+  if (HweLnFirstRow(cur_female_hetd, cur_female_hom1d, cur_female_hom2d, &tailp, &starting_lnprob, &tie_ct, &orig_base_probl, &orig_base_probr, &orig_saved_lhets, &orig_saved_lhom1, &orig_saved_lhom2, &orig_saved_rhets, &orig_saved_rhom1, &orig_saved_rhom2)) {
+    return 0.0;
+  }
+  // a "row" holds male1/male2 constant.
+  const double orig_row_prob = tailp + centerp;
+  n1 += male1;
+  n2 += male2;
+  for (uint32_t male1_decreasing = 0; male1_decreasing != 2; ++male1_decreasing) {
+    double cur_male1 = male1;
+    double cur_male2 = male2;
+    double row_prob = orig_row_prob;
+    double cur_lhets = orig_saved_lhets;
+    double cur_lhom1 = orig_saved_lhom1;
+    double cur_lhom2 = orig_saved_lhom2;
+    double cur_rhets = orig_saved_rhets;
+    double cur_rhom1 = orig_saved_rhom1;
+    double cur_rhom2 = orig_saved_rhom2;
+    double base_probl = orig_base_probl;
+    double base_probr = orig_base_probr;
+    uint32_t iter_ct;
+    if (male1_decreasing) {
+      iter_ct = 2 * female_hom2 + female_hets;
+      if (iter_ct > S_CAST(uint32_t, male1)) {
+        iter_ct = male1;
+      }
+    } else {
+      iter_ct = 2 * female_hom1 + female_hets;
+      if (iter_ct > S_CAST(uint32_t, male2)) {
+        iter_ct = male2;
+      }
+    }
+    for (uint32_t iter_idx = 0; iter_idx != iter_ct; ++iter_idx) {
+      if (male1_decreasing) {
+        const double old_male1 = cur_male1;
+        const double old_female2 = n2 - cur_male2;
+        cur_male2 += 1;
+        cur_male1 -= 1;
+        // row likelihood is ((n1 choose male1) * (n2 choose male2)) /
+        //   ((n1 + n2) choose (male1 + male2))
+        row_prob *= (old_male1 * old_female2) / (cur_male2 * (n1 - cur_male1));
+        // bugfix (19 Apr 2017): We cannot move to the right of the mode here.
+        // Otherwise, if the mode itself is more probable than our initial
+        // table, but the table to the immediate right of the mode is not,
+        // we'll fail to count the mode.
+        // ("right" = high het count, "left" = low het count.)
+        if (cur_lhets != 0.0) {
+          cur_lhom1 += 1;
+          base_probl *= (old_male1 * cur_lhets) / (2 * cur_male2 * cur_lhom1);
+          cur_lhets -= 1;
+        } else {
+          cur_lhets += 1;
+          base_probl *= (2 * old_male1 * cur_lhom2) / (cur_male2 * cur_lhets);
+          cur_lhom2 -= 1;
+        }
+      } else {
+        const double old_male2 = cur_male2;
+        const double old_female1 = n1 - cur_male1;
+        cur_male1 += 1;
+        cur_male2 -= 1;
+        row_prob *= (old_male2 * old_female1) / (cur_male1 * (n2 - cur_male2));
+        if (cur_lhets != 0.0) {
+          cur_lhom2 += 1;
+          base_probl *= (old_male2 * cur_lhets) / (2 * cur_male1 * cur_lhom2);
+          cur_lhets -= 1;
+        } else {
+          cur_lhets += 1;
+          base_probl *= (2 * old_male2 * cur_lhom1) / (cur_male1 * cur_lhets);
+          cur_lhom1 -= 1;
+        }
+      }
+      double tail_incr1;
+      if (HweXchrPTailsum(0, &base_probl, &cur_lhets, &cur_lhom1, &cur_lhom2, &tie_ct, &tail_incr1)) {
+        // all tables in this row, and all subsequent rows, are less probable
+        // than the initial table.
+        double cur_female1 = n1 - cur_male1;
+        double cur_female2 = n2 - cur_male2;
+        if (male1_decreasing) {
+          while (1) {
+            const double preaddp = tailp;
+            tailp += row_prob;
+            if (tailp == preaddp) {
+              break;
+            }
+            cur_male2 += 1;
+            cur_female1 += 1;
+            row_prob *= (cur_male1 * cur_female2) / (cur_male2 * cur_female1);
+            cur_male1 -= 1;
+            cur_female2 -= 1;
+          }
+        } else {
+          while (1) {
+            const double preaddp = tailp;
+            tailp += row_prob;
+            if (tailp == preaddp) {
+              break;
+            }
+            cur_male1 += 1;
+            cur_female2 += 1;
+            row_prob *= (cur_male2 * cur_female1) / (cur_male1 * cur_female2);
+            cur_male2 -= 1;
+            cur_female1 -= 1;
+          }
+        }
+        break;
+      }
+      tailp += tail_incr1;
+      if (male1_decreasing) {
+        const double old_male1 = cur_male1 + 1;
+        if (cur_rhom2 != 0.0) {
+          cur_rhets += 1;
+          base_probr *= (2 * old_male1 * cur_rhom2) / (cur_male2 * cur_rhets);
+          cur_rhom2 -= 1;
+        } else {
+          cur_rhom1 += 1;
+          base_probr *= (old_male1 * cur_rhets) / (2 * cur_male2 * cur_rhom1);
+          cur_rhets -= 1;
+        }
+      } else {
+        const double old_male2 = cur_male2 + 1;
+        if (cur_rhom1 != 0.0) {
+          cur_rhets += 1;
+          base_probr *= (2 * old_male2 * cur_rhom1) / (cur_male1 * cur_rhets);
+          cur_rhom1 -= 1;
+        } else {
+          cur_rhom2 += 1;
+          base_probr *= (old_male2 * cur_rhets) / (2 * cur_male1 * cur_rhom2);
+          cur_rhets -= 1;
+        }
+      }
+      double tail_incr2 = 0.0;  // maybe-uninitialized warning
+      HweXchrPTailsum(1, &base_probr, &cur_rhets, &cur_rhom1, &cur_rhom2, &tie_ct, &tail_incr2);
+      tailp += tail_incr2;
+      centerp += row_prob - tail_incr1 - tail_incr2;
+      if (centerp > DBL_MAX) {
+        return 0;
+      }
+    }
+  }
+  if (!midp) {
+    return tailp / (tailp + centerp);
+  }
+  return (tailp - ((1 - kExactTestEpsilon2) * kExactTestBias * 0.5) * S_CAST(int32_t, tie_ct)) / (tailp + centerp);
+}
+*/
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/2.0/include/plink2_stats.h b/2.0/include/plink2_stats.h
index 1d0c692d..4c3a4c53 100644
--- a/2.0/include/plink2_stats.h
+++ b/2.0/include/plink2_stats.h
@@ -58,17 +58,42 @@ HEADER_INLINE double ZscoreToLnP(double zz) {
   return ChisqToLnP(zz * zz, 1);
 }
 
-double HweP(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, uint32_t midp);
+// Assumes xx is a nonnegative integer.
+double Lfact(double xx);
 
-// returns 0 if close enough to Hardy-Weinberg equilibrium
+// HweP() has been replaced by HweLnP().  HweThresh() and HweThreshMidp() have
+// been replaced by HweThreshLn().
+
+double HweLnP(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, uint32_t midp);
+
+// these return 0 if close enough to Hardy-Weinberg equilibrium
 uint32_t HweThresh(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, double thresh);
 
 uint32_t HweThreshMidp(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, double thresh);
 
+uint32_t HweThreshLnMain(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, uint32_t midp, double ln_thresh);
+
+HEADER_INLINE uint32_t HweThreshLn(int32_t obs_hets, int32_t obs_hom1, int32_t obs_hom2, uint32_t midp, double thresh, double ln_thresh) {
+  // kLnNormalMin = -708.3964185...
+  if (ln_thresh > -708.396) {
+    if (!midp) {
+      return HweThresh(obs_hets, obs_hom1, obs_hom2, thresh);
+    } else {
+      return HweThreshMidp(obs_hets, obs_hom1, obs_hom2, thresh);
+    }
+  }
+  return HweThreshLnMain(obs_hets, obs_hom1, obs_hom2, midp, ln_thresh);
+}
+
 double FisherExact2x2P(uint32_t m11, uint32_t m12, uint32_t m21, uint32_t m22, uint32_t midp);
 
 double HweXchrP(int32_t female_hets, int32_t female_hom1, int32_t female_hom2, int32_t male1, int32_t male2, uint32_t midp);
 
+HEADER_INLINE double HweXchrLnP(int32_t female_hets, int32_t female_hom1, int32_t female_hom2, int32_t male1, int32_t male2, uint32_t midp) {
+  const double pval = HweXchrP(female_hets, female_hom1, female_hom2, male1, male2, midp);
+  return (pval == 0)? -750 : log(pval);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/2.0/plink2.cc b/2.0/plink2.cc
index cf7b5d47..7d9f818d 100644
--- a/2.0/plink2.cc
+++ b/2.0/plink2.cc
@@ -72,7 +72,7 @@ static const char ver_str[] = "PLINK v2.00a6"
 #elif defined(USE_AOCL)
   " AMD"
 #endif
-  " (5 Jan 2024)";
+  " (3 Feb 2024)";
 static const char ver_str2[] =
   // include leading space if day < 10, so character length stays the same
   " "
@@ -367,7 +367,7 @@ typedef struct Plink2CmdlineStruct {
   double vif_thresh;
   double mind_thresh;
   double geno_thresh;
-  double hwe_thresh;
+  double hwe_ln_thresh;
   double mach_r2_min;
   double mach_r2_max;
   double minimac3_r2_min;
@@ -472,12 +472,12 @@ typedef struct Plink2CmdlineStruct {
 } Plink2Cmdline;
 
 // er, probably time to just always initialize this...
-uint32_t SingleVariantLoaderIsNeeded(const char* king_cutoff_fprefix, Command1Flags command_flags1, MakePlink2Flags make_plink2_flags, RmDupMode rmdup_mode, double hwe_thresh) {
+uint32_t SingleVariantLoaderIsNeeded(const char* king_cutoff_fprefix, Command1Flags command_flags1, MakePlink2Flags make_plink2_flags, RmDupMode rmdup_mode, double hwe_ln_thresh) {
   return (command_flags1 & (kfCommand1Exportf | kfCommand1MakeKing | kfCommand1GenoCounts | kfCommand1LdPrune | kfCommand1Validate | kfCommand1Pca | kfCommand1MakeRel | kfCommand1Glm | kfCommand1Score | kfCommand1Ld | kfCommand1Hardy | kfCommand1Sdiff | kfCommand1PgenDiff | kfCommand1Clump | kfCommand1Vcor)) ||
     ((command_flags1 & kfCommand1MakePlink2) && (make_plink2_flags & kfMakePgen)) ||
     ((command_flags1 & kfCommand1KingCutoff) && (!king_cutoff_fprefix)) ||
     (rmdup_mode != kRmDup0) ||
-    (hwe_thresh != 0.0);
+    (hwe_ln_thresh != -DBL_MAX);
 }
 
 
@@ -578,15 +578,15 @@ uint32_t VariantMissingDosageCtsAreNeeded(Command1Flags command_flags1, MiscFlag
 
 // can simplify --geno-counts all-biallelic case, but let's first make sure the
 // general case works for multiallelic variants
-uint32_t RawGenoCtsAreNeeded(Command1Flags command_flags1, MiscFlags misc_flags, double hwe_thresh) {
+uint32_t RawGenoCtsAreNeeded(Command1Flags command_flags1, MiscFlags misc_flags, double hwe_ln_thresh) {
   // Keep this in sync with --error-on-freq-calc.
   return (command_flags1 & kfCommand1GenoCounts) ||
-    ((misc_flags & kfMiscNonfounders) && ((command_flags1 & kfCommand1Hardy) || (hwe_thresh != 0.0)));
+    ((misc_flags & kfMiscNonfounders) && ((command_flags1 & kfCommand1Hardy) || (hwe_ln_thresh != -DBL_MAX)));
 }
 
-uint32_t FounderRawGenoCtsAreNeeded(Command1Flags command_flags1, MiscFlags misc_flags, double hwe_thresh) {
+uint32_t FounderRawGenoCtsAreNeeded(Command1Flags command_flags1, MiscFlags misc_flags, double hwe_ln_thresh) {
   // Keep this in sync with --error-on-freq-calc.
-  return (!(misc_flags & kfMiscNonfounders)) && ((command_flags1 & kfCommand1Hardy) || (hwe_thresh != 0.0));
+  return (!(misc_flags & kfMiscNonfounders)) && ((command_flags1 & kfCommand1Hardy) || (hwe_ln_thresh != -DBL_MAX));
 }
 
 uint32_t InfoReloadIsNeeded(Command1Flags command_flags1, PvarPsamFlags pvar_psam_flags, ExportfFlags exportf_flags, RmDupMode rmdup_mode) {
@@ -1090,7 +1090,7 @@ PglErr Plink2Core(const Plink2Cmdline* pcp, MakePlink2Flags make_plink2_flags, c
 
         pgfi.gflags &= ~kfPgenGlobalAllNonref;
       }
-      if (SingleVariantLoaderIsNeeded(king_cutoff_fprefix, pcp->command_flags1, make_plink2_flags, pcp->rmdup_mode, pcp->hwe_thresh)) {
+      if (SingleVariantLoaderIsNeeded(king_cutoff_fprefix, pcp->command_flags1, make_plink2_flags, pcp->rmdup_mode, pcp->hwe_ln_thresh)) {
         // ugly kludge, probably want to add pgenlib_internal support for this
         // hybrid use pattern
         FILE* shared_ff_copy = pgfi.shared_ff;
@@ -1906,7 +1906,7 @@ PglErr Plink2Core(const Plink2Cmdline* pcp, MakePlink2Flags make_plink2_flags, c
           x_start = cip->chr_fo_vidx_start[x_chr_fo_idx];
           const uint32_t x_end = cip->chr_fo_vidx_start[x_chr_fo_idx + 1];
           x_len = x_end - x_start;
-          if (x_len && ((pcp->command_flags1 & kfCommand1Hardy) || (pcp->hwe_thresh != 0.0)) && (!AllBitsAreZero(variant_include, x_start, x_end))) {
+          if (x_len && ((pcp->command_flags1 & kfCommand1Hardy) || (pcp->hwe_ln_thresh != -DBL_MAX)) && (!AllBitsAreZero(variant_include, x_start, x_end))) {
             if (nonfounders) {
               hwe_x_probs_needed = (sample_ct > nosex_ct);
             } else {
@@ -1979,7 +1979,7 @@ PglErr Plink2Core(const Plink2Cmdline* pcp, MakePlink2Flags make_plink2_flags, c
         // [0] = homref ct, [1] = het ref-altx total, [2] = nonref diploid
         //   total
         // use unfiltered indexes, since we remove more variants later
-        if (RawGenoCtsAreNeeded(pcp->command_flags1, pcp->misc_flags, pcp->hwe_thresh)) {
+        if (RawGenoCtsAreNeeded(pcp->command_flags1, pcp->misc_flags, pcp->hwe_ln_thresh)) {
           if (unlikely(BIGSTACK_ALLOC_STD_ARRAY(uint32_t, 3, raw_variant_ct, &raw_geno_cts))) {
             goto Plink2Core_ret_NOMEM;
           }
@@ -1996,7 +1996,7 @@ PglErr Plink2Core(const Plink2Cmdline* pcp, MakePlink2Flags make_plink2_flags, c
             }
           }
         }
-        if (FounderRawGenoCtsAreNeeded(pcp->command_flags1, pcp->misc_flags, pcp->hwe_thresh)) {
+        if (FounderRawGenoCtsAreNeeded(pcp->command_flags1, pcp->misc_flags, pcp->hwe_ln_thresh)) {
           if ((founder_ct == sample_ct) && raw_geno_cts) {
             founder_raw_geno_cts = raw_geno_cts;
             founder_x_male_geno_cts = x_male_geno_cts;
@@ -2134,14 +2134,14 @@ PglErr Plink2Core(const Plink2Cmdline* pcp, MakePlink2Flags make_plink2_flags, c
           EnforceGenoThresh(cip, (pcp->misc_flags & kfMiscGenoDosage)? variant_missing_dosage_cts : variant_missing_hc_cts, geno_hh_missing? variant_hethap_cts : nullptr, sample_ct, male_ct, geno_hh_missing? first_hap_uidx : 0x7fffffff, pcp->geno_thresh, variant_include, &variant_ct);
         }
 
-        if ((pcp->command_flags1 & kfCommand1Hardy) || (pcp->hwe_thresh != 0.0)) {
+        if ((pcp->command_flags1 & kfCommand1Hardy) || (pcp->hwe_ln_thresh != -DBL_MAX)) {
           if (cip->haploid_mask[0] & 1) {
             if (unlikely(pcp->command_flags1 & kfCommand1Hardy)) {
               logerrputs("Error: --hardy is pointless on an all-haploid genome.\n");
               goto Plink2Core_ret_INCONSISTENT_INPUT;
             }
-            // could make hwe_thresh non-const and set it to 0.0 earlier on in
-            // this case
+            // could make hwe_ln_thresh non-const and set it to -DBL_MAX
+            // earlier on in this case
             logerrputs("Warning: --hwe has no effect since entire genome is haploid.\n");
           } else {
             STD_ARRAY_PTR_DECL(uint32_t, 3, hwe_geno_cts) = nonfounders? raw_geno_cts : founder_raw_geno_cts;
@@ -2206,14 +2206,14 @@ PglErr Plink2Core(const Plink2Cmdline* pcp, MakePlink2Flags make_plink2_flags, c
               }
             }
 
-            double* hwe_x_pvals = nullptr;
-            if (hwe_x_ct && ((pcp->hwe_thresh != 0.0) || (pcp->hardy_flags & kfHardyColP))) {
+            double* hwe_x_ln_pvals = nullptr;
+            if (hwe_x_ct && ((pcp->hwe_ln_thresh != -DBL_MAX) || (pcp->hardy_flags & kfHardyColP))) {
               // support suppression of --hardy p column (with a gigantic
               // dataset, maybe it's reasonable to stick to femalep, etc.)
               uint32_t hwe_midp;
               if (pcp->command_flags1 & kfCommand1Hardy) {
                 hwe_midp = (pcp->hardy_flags / kfHardyMidp) & 1;
-                if (pcp->hwe_thresh != 0.0) {
+                if (pcp->hwe_ln_thresh != -DBL_MAX) {
                   const uint32_t hwe_midp2 = (pcp->misc_flags / kfMiscHweMidp) & 1;
                   if (unlikely(hwe_midp != hwe_midp2)) {
                     // could support this efficiently, but why bother...
@@ -2224,13 +2224,13 @@ PglErr Plink2Core(const Plink2Cmdline* pcp, MakePlink2Flags make_plink2_flags, c
               } else {
                 hwe_midp = (pcp->misc_flags / kfMiscHweMidp) & 1;
               }
-              reterr = ComputeHweXPvals(variant_include, allele_idx_offsets, hwe_geno_cts, hwe_x_male_geno_cts, hwe_x_nosex_geno_cts, x_knownsex_xgeno_cts, x_male_xgeno_cts, x_start, hwe_x_ct, x_xallele_ct, hwe_midp, pcp->max_thread_ct, &hwe_x_pvals);
+              reterr = ComputeHweXLnPvals(variant_include, allele_idx_offsets, hwe_geno_cts, hwe_x_male_geno_cts, hwe_x_nosex_geno_cts, x_knownsex_xgeno_cts, x_male_xgeno_cts, x_start, hwe_x_ct, x_xallele_ct, hwe_midp, pcp->max_thread_ct, &hwe_x_ln_pvals);
               if (unlikely(reterr)) {
                 goto Plink2Core_ret_1;
               }
             }
             if (pcp->command_flags1 & kfCommand1Hardy) {
-              reterr = HardyReport(variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, nonref_flags, hwe_geno_cts, autosomal_xgeno_cts, hwe_x_male_geno_cts, hwe_x_nosex_geno_cts, x_knownsex_xgeno_cts, x_male_xgeno_cts, hwe_x_pvals, variant_ct, hwe_x_ct, max_allele_slen, pgfi.gflags, pcp->output_min_ln, pcp->hardy_flags, pcp->max_thread_ct, nonfounders, outname, outname_end);
+              reterr = HardyReport(variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, nonref_flags, hwe_geno_cts, autosomal_xgeno_cts, hwe_x_male_geno_cts, hwe_x_nosex_geno_cts, x_knownsex_xgeno_cts, x_male_xgeno_cts, hwe_x_ln_pvals, variant_ct, hwe_x_ct, max_allele_slen, pgfi.gflags, pcp->output_min_ln, pcp->hardy_flags, pcp->max_thread_ct, nonfounders, outname, outname_end);
               if (unlikely(reterr)) {
                 goto Plink2Core_ret_1;
               }
@@ -2238,14 +2238,14 @@ PglErr Plink2Core(const Plink2Cmdline* pcp, MakePlink2Flags make_plink2_flags, c
                 continue;
               }
             }
-            if (pcp->hwe_thresh != 0.0) {
-              // assumes no filtering between hwe_x_pvals[] computation and
+            if (pcp->hwe_ln_thresh != -DBL_MAX) {
+              // assumes no filtering between hwe_x_ln_pvals[] computation and
               // here
-              EnforceHweThresh(cip, allele_idx_offsets, hwe_geno_cts, autosomal_xgeno_cts, hwe_x_male_geno_cts, hwe_x_nosex_geno_cts, x_knownsex_xgeno_cts, x_male_xgeno_cts, hwe_x_pvals, pcp->misc_flags, pcp->hwe_thresh, nonfounders, variant_include, &variant_ct);
+              EnforceHweThresh(cip, allele_idx_offsets, hwe_geno_cts, autosomal_xgeno_cts, hwe_x_male_geno_cts, hwe_x_nosex_geno_cts, x_knownsex_xgeno_cts, x_male_xgeno_cts, hwe_x_ln_pvals, pcp->misc_flags, pcp->hwe_ln_thresh, nonfounders, variant_include, &variant_ct);
             }
           }
         }
-        // raw_geno_cts/founder_raw_geno_cts/hwe_x_pvals no longer needed
+        // raw_geno_cts/founder_raw_geno_cts/hwe_x_ln_pvals no longer needed
         BigstackReset(bigstack_mark_geno_cts);
 
         if ((pcp->min_maf != 0.0) || (pcp->max_maf != 1.0) || pcp->min_allele_ddosage || (pcp->max_allele_ddosage != (~0LLU))) {
@@ -3746,7 +3746,7 @@ int main(int argc, char** argv) {
     pc.vif_thresh = 50.0;
     pc.mind_thresh = 1.0;
     pc.geno_thresh = 1.0;
-    pc.hwe_thresh = 0.0;
+    pc.hwe_ln_thresh = -DBL_MAX;
     pc.mach_r2_min = 0.0;
     pc.mach_r2_max = 0.0;
     pc.minimac3_r2_min = 0.0;
@@ -6352,6 +6352,8 @@ int main(int argc, char** argv) {
               pc.hardy_flags |= kfHardyZs;
             } else if (strequal_k(cur_modif, "midp", cur_modif_slen)) {
               pc.hardy_flags |= kfHardyMidp;
+            } else if (strequal_k(cur_modif, "log10", cur_modif_slen)) {
+              pc.hardy_flags |= kfHardyLog10;
             } else if (strequal_k(cur_modif, "redundant", cur_modif_slen)) {
               pc.hardy_flags |= kfHardyRedundant;
             } else if (likely(StrStartsWith(cur_modif, "cols=", cur_modif_slen))) {
@@ -6388,17 +6390,17 @@ int main(int argc, char** argv) {
             } else if (!strcmp(cur_modif, "keep-fewhet")) {
               pc.misc_flags |= kfMiscHweKeepFewhet;
             } else {
-              if (unlikely((pc.hwe_thresh != 0.0) || (!ScantokDouble(cur_modif, &pc.hwe_thresh)))) {
+              if (unlikely((pc.hwe_ln_thresh != -DBL_MAX) || (!ScantokLn(cur_modif, &pc.hwe_ln_thresh)))) {
                 logerrputs("Error: Invalid --hwe argument sequence.\n");
                 goto main_ret_INVALID_CMDLINE_A;
               }
-              if (unlikely((pc.hwe_thresh < 0.0) || (pc.hwe_thresh >= 1.0))) {
+              if (unlikely(pc.hwe_ln_thresh >= 0.0)) {
                 snprintf(g_logbuf, kLogbufSize, "Error: Invalid --hwe threshold '%s' (must be in [0, 1)).\n", cur_modif);
                 goto main_ret_INVALID_CMDLINE_WWA;
               }
             }
           }
-          if ((pc.misc_flags & kfMiscHweMidp) && (pc.hwe_thresh >= 0.5)) {
+          if ((pc.misc_flags & kfMiscHweMidp) && (pc.hwe_ln_thresh >= -kLn2)) {
             logerrputs("Error: --hwe threshold must be smaller than 0.5 when using mid-p adjustment.\n");
           }
           pc.filter_flags |= kfFilterPvarReq;
diff --git a/2.0/plink2_filter.cc b/2.0/plink2_filter.cc
index 23f6505e..2c8fa81c 100644
--- a/2.0/plink2_filter.cc
+++ b/2.0/plink2_filter.cc
@@ -3791,18 +3791,18 @@ void EnforceGenoThresh(const ChrInfo* cip, const uint32_t* variant_missing_cts,
   *variant_ct_ptr -= removed_ct;
 }
 
-void EnforceHweThresh(const ChrInfo* cip, const uintptr_t* allele_idx_offsets, const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, autosomal_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_male_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_nosex_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_knownsex_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_male_xgeno_cts), const double* hwe_x_pvals, MiscFlags misc_flags, double hwe_thresh, uint32_t nonfounders, uintptr_t* variant_include, uint32_t* variant_ct_ptr) {
+void EnforceHweThresh(const ChrInfo* cip, const uintptr_t* allele_idx_offsets, const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, autosomal_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_male_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_nosex_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_knownsex_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_male_xgeno_cts), const double* hwe_x_ln_pvals, MiscFlags misc_flags, double hwe_ln_thresh, uint32_t nonfounders, uintptr_t* variant_include, uint32_t* variant_ct_ptr) {
   uint32_t prefilter_variant_ct = *variant_ct_ptr;
   const uint32_t midp = (misc_flags / kfMiscHweMidp) & 1;
   const uint32_t keep_fewhet = (misc_flags / kfMiscHweKeepFewhet) & 1;
-  hwe_thresh *= 1 - kSmallEpsilon;
+  hwe_ln_thresh *= 1 + kSmallEpsilon;
+  const double hwe_thresh = exp(hwe_ln_thresh);
   uint32_t removed_ct = 0;
   uint32_t min_obs = UINT32_MAX;
   uint32_t max_obs = 0;
   uint32_t male_a1_ct = 0;
   uint32_t male_ax_ct = 0;
-  const double* hwe_x_pvals_iter = hwe_x_pvals;
-  const double hwe_thresh_recip = (1 + 4 * kSmallEpsilon) / hwe_thresh;
+  const double* hwe_x_ln_pvals_iter = hwe_x_ln_pvals;
   uintptr_t autosomal_xgeno_idx = 0;
   uintptr_t x_xgeno_idx = 0;
   uint32_t x_skip_code = UINT32_MAX;
@@ -3811,7 +3811,7 @@ void EnforceHweThresh(const ChrInfo* cip, const uintptr_t* allele_idx_offsets, c
   if (XymtExists(cip, kChrOffsetX, &x_code)) {
     const uint32_t x_chr_fo_idx = cip->chr_idx_to_foidx[x_code];
     x_start = cip->chr_fo_vidx_start[x_chr_fo_idx];
-    if (!hwe_x_pvals) {
+    if (!hwe_x_ln_pvals) {
       x_skip_code = x_code;  // only set this if we're skipping chrX
       prefilter_variant_ct -= PopcountBitRange(variant_include, x_start, cip->chr_fo_vidx_start[x_chr_fo_idx + 1]);
     }
@@ -3870,11 +3870,7 @@ void EnforceHweThresh(const ChrInfo* cip, const uintptr_t* allele_idx_offsets, c
           }
         }
         pval_computed = 1;
-        if (midp) {
-          test_failed = HweThreshMidp(het_a1_ct, hom_a1_ct, two_ax_ct, hwe_thresh);
-        } else {
-          test_failed = HweThresh(het_a1_ct, hom_a1_ct, two_ax_ct, hwe_thresh);
-        }
+        test_failed = HweThreshLn(het_a1_ct, hom_a1_ct, two_ax_ct, midp, hwe_thresh, hwe_ln_thresh);
         if (test_failed) {
           break;
         }
@@ -3906,22 +3902,21 @@ void EnforceHweThresh(const ChrInfo* cip, const uintptr_t* allele_idx_offsets, c
       const uint32_t female_obs_ct = hom_a1_ct + het_a1_ct + two_ax_ct;
       cur_obs_ct = female_obs_ct + male_a1_ct + male_ax_ct;
       pval_computed = 1;
-      double joint_pval = *hwe_x_pvals_iter++;
+      double joint_ln_pval = *hwe_x_ln_pvals_iter++;
       for (uint32_t xallele_idx = 0; ; ++xallele_idx) {
-        test_failed = (joint_pval < hwe_thresh);
+        test_failed = (joint_ln_pval < hwe_ln_thresh);
         if (test_failed && keep_fewhet && (het_a1_ct * S_CAST(uint64_t, het_a1_ct) < (4LLU * hom_a1_ct) * two_ax_ct)) {
           // female-only retest
-          if (joint_pval != 0.0) {
-            joint_pval *= hwe_thresh_recip;
+          double joint_pval;
+          if (joint_ln_pval != -750) {
+            joint_ln_pval = joint_ln_pval - hwe_ln_thresh;
+            joint_pval = exp(joint_ln_pval);
           } else {
-            // keep the variant iff female-only p-value also underflows
+            // temporary: preserve preexisting behavior
+            joint_ln_pval = -1022 * kLn2;
             joint_pval = kDblNormalMin;
           }
-          if (midp) {
-            test_failed = !HweThreshMidp(het_a1_ct, hom_a1_ct, two_ax_ct, joint_pval);
-          } else {
-            test_failed = !HweThresh(het_a1_ct, hom_a1_ct, two_ax_ct, joint_pval);
-          }
+          test_failed = !HweThreshLn(het_a1_ct, hom_a1_ct, two_ax_ct, midp, joint_pval, joint_ln_pval);
         }
         // bugfix (27 Jun 2020): don't clobber previous allele-test failure if
         // variant is multiallelic
@@ -3937,10 +3932,10 @@ void EnforceHweThresh(const ChrInfo* cip, const uintptr_t* allele_idx_offsets, c
           het_a1_ct -= cur_male_xgeno_cts[1];
         }
         two_ax_ct = female_obs_ct - hom_a1_ct - het_a1_ct;
-        joint_pval = hwe_x_pvals_iter[xallele_idx];
+        joint_ln_pval = hwe_x_ln_pvals_iter[xallele_idx];
       }
       x_xgeno_idx += xallele_ct;
-      hwe_x_pvals_iter = &(hwe_x_pvals_iter[xallele_ct]);
+      hwe_x_ln_pvals_iter = &(hwe_x_ln_pvals_iter[xallele_ct]);
     }
     if (test_failed) {
       ClearBit(variant_uidx, variant_include);
diff --git a/2.0/plink2_filter.h b/2.0/plink2_filter.h
index 5f0238a3..f9c017b2 100644
--- a/2.0/plink2_filter.h
+++ b/2.0/plink2_filter.h
@@ -102,7 +102,7 @@ PglErr MindFilter(const uint32_t* sample_missing_cts, const uint32_t* sample_het
 
 void EnforceGenoThresh(const ChrInfo* cip, const uint32_t* variant_missing_cts, const uint32_t* variant_hethap_cts, uint32_t sample_ct, uint32_t male_ct, uint32_t first_hap_uidx, double geno_thresh, uintptr_t* variant_include, uint32_t* variant_ct_ptr);
 
-void EnforceHweThresh(const ChrInfo* cip, const uintptr_t* allele_idx_offsets, const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, autosomal_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_male_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_nosex_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_knownsex_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_male_xgeno_cts), const double* hwe_x_pvals, MiscFlags misc_flags, double hwe_thresh, uint32_t nonfounders, uintptr_t* variant_include, uint32_t* variant_ct_ptr);
+void EnforceHweThresh(const ChrInfo* cip, const uintptr_t* allele_idx_offsets, const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, autosomal_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_male_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_nosex_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_knownsex_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_male_xgeno_cts), const double* hwe_x_ln_pvals, MiscFlags misc_flags, double hwe_ln_thresh, uint32_t nonfounders, uintptr_t* variant_include, uint32_t* variant_ct_ptr);
 
 // main loop has a bitwise Nref-or-Alt1 check
 ENUM_U31_DEF_START()
diff --git a/2.0/plink2_help.cc b/2.0/plink2_help.cc
index c52f6b11..e3918ad5 100644
--- a/2.0/plink2_help.cc
+++ b/2.0/plink2_help.cc
@@ -723,13 +723,14 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "    The default is chrom,maybeprovref,nmiss,nobs,fmiss.\n\n"
               );
     HelpPrint("hardy\0", &help_ctrl, 1,
-"  --hardy ['zs'] ['midp'] ['redundant'] ['cols='<column set descriptor>]\n"
+"  --hardy ['zs'] ['midp'] ['log10'] ['redundant'] ['cols='<col set descriptor>]\n"
 "    Hardy-Weinberg exact test p-value report(s).\n"
 "    * By default, only founders are considered; change this with --nonfounders.\n"
 "    * chrX is now omitted from the main <output prefix>.hardy report.  Instead,\n"
 "      (if present) it gets its own <output prefix>.hardy.x report based on the\n"
 "      method described in Graffelman J, Weir BS (2016) Hardy-Weinberg\n"
 "      equilibrium and the X chromosome.\n"
+"    * The 'log10' modifier causes p-values to be reported in -log10(p) form.\n"
 "    * For variants with k alleles where k>2, k separate 'biallelic' tests are\n"
 "      performed, each reported on its own line.  However, biallelic variants\n"
 "      are normally reported on a single line, since the counts/frequencies\n"
@@ -757,7 +758,7 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "      hetfreq: Observed and expected het-A1 frequencies.\n"
 "      sexaf: Female and male A1 observed allele frequencies (chrX only).\n"
 "      femalep: Female-only p/midp-value (chrX only).\n"
-"      p: Hardy-Weinberg equilibrium exact test p/midp-value.\n"
+"      p: Hardy-Weinberg equilibrium exact test p/midp-value (or -log10(p)).\n"
 "    The default is chrom,maybeprovref,ax,gcounts,hetfreq,sexaf,p.\n\n"
                );
     HelpPrint("het\0", &help_ctrl, 1,
diff --git a/2.0/plink2_import.cc b/2.0/plink2_import.cc
index 28948abd..a45e9090 100644
--- a/2.0/plink2_import.cc
+++ b/2.0/plink2_import.cc
@@ -1866,6 +1866,8 @@ VcfParseErr VcfConvertUnphasedMultiallelicLine(const VcfImportBaseContext* vibcp
                   }
                 }
               } else {
+                // don't check halfcall_mode == kVcfHalfCallMissing yet because
+                // we want error_on_polyploid to work
                 cur_geno = ctow(second_allele_first_char) - 48;
                 if (unlikely(cur_geno >= 10)) {
                   return kVcfParseInvalidGt;
@@ -1906,6 +1908,9 @@ VcfParseErr VcfConvertUnphasedMultiallelicLine(const VcfImportBaseContext* vibcp
                       cur_geno = 2;
                     }
                   }
+                } else {
+                  // bugfix (3 Feb 2024)
+                  cur_geno = 3;
                 }
               }
             }
@@ -2417,6 +2422,8 @@ VcfParseErr VcfConvertPhasedMultiallelicLine(const VcfImportBaseContext* vibcp,
                   }
                 }
               } else {
+                // don't check halfcall_mode == kVcfHalfCallMissing yet because
+                // we want error_on_polyploid to work
                 cur_geno = ctow(second_allele_first_char) - 48;
                 if (unlikely(cur_geno >= 10)) {
                   return kVcfParseInvalidGt;
@@ -2459,6 +2466,9 @@ VcfParseErr VcfConvertPhasedMultiallelicLine(const VcfImportBaseContext* vibcp,
                       cur_geno = 2;
                     }
                   }
+                } else {
+                  // bugfix (3 Feb 2024)
+                  cur_geno = 3;
                 }
               }
             }
diff --git a/2.0/plink2_ld.cc b/2.0/plink2_ld.cc
index f127c6e9..22179abf 100644
--- a/2.0/plink2_ld.cc
+++ b/2.0/plink2_ld.cc
@@ -5198,9 +5198,13 @@ PglErr LdConsole(const uintptr_t* variant_include, const ChrInfo* cip, const cha
         const uintptr_t* cur_genovec = pgvs[var_idx].genovec;
         STD_ARRAY_DECL(uint32_t, 4, genocounts);
         GenoarrCountFreqsUnsafe(cur_genovec, founder_ct, genocounts);
-        double hwe_pval;
+        write_iter = strcpya_k(g_logbuf, "  ");
+        write_iter = strcpya(write_iter, ld_console_varids[var_idx]);
+        write_iter = strcpya_k(write_iter, ": ");
+        double hwe_ln_pval;
         if (!is_xs[var_idx]) {
-          hwe_pval = HweP(genocounts[1], genocounts[0], genocounts[2], hwe_midp);
+          hwe_ln_pval = HweLnP(genocounts[1], genocounts[0], genocounts[2], hwe_midp);
+          write_iter = lntoa_g(hwe_ln_pval, write_iter);
         } else {
           STD_ARRAY_DECL(uint32_t, 4, male_genocounts);
           GenoarrCountSubsetFreqs(cur_genovec, sex_male_collapsed_interleaved, founder_ct, x_male_ct, male_genocounts);
@@ -5212,9 +5216,16 @@ PglErr LdConsole(const uintptr_t* variant_include, const ChrInfo* cip, const cha
             genocounts[1] -= nosex_genocounts[1];
             genocounts[2] -= nosex_genocounts[2];
           }
-          hwe_pval = HweXchrP(genocounts[1], genocounts[0] - male_genocounts[0], genocounts[2] - male_genocounts[2], male_genocounts[0], male_genocounts[2], hwe_midp);
+          hwe_ln_pval = HweXchrLnP(genocounts[1], genocounts[0] - male_genocounts[0], genocounts[2] - male_genocounts[2], male_genocounts[0], male_genocounts[2], hwe_midp);
+          // temporary
+          if (hwe_ln_pval != -750) {
+            write_iter = lntoa_g(hwe_ln_pval, write_iter);
+          } else {
+            *write_iter++ = '0';
+          }
         }
-        logprintf("  %s: %g\n", ld_console_varids[var_idx], hwe_pval);
+        memcpy_k(write_iter, "\n", 2);
+        logputsb();
       }
     }
     logputs("\n");
diff --git a/2.0/plink2_misc.cc b/2.0/plink2_misc.cc
index 11db7d3f..dcf3a5be 100644
--- a/2.0/plink2_misc.cc
+++ b/2.0/plink2_misc.cc
@@ -4762,7 +4762,7 @@ PglErr GetMultiallelicMarginalCounts(const uintptr_t* founder_info, const uintpt
   return reterr;
 }
 
-typedef struct ComputeHweXPvalsCtxStruct {
+typedef struct ComputeHweXLnPvalsCtxStruct {
   const uintptr_t* variant_include;
   const uintptr_t* allele_idx_offsets;
   const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts);
@@ -4777,10 +4777,10 @@ typedef struct ComputeHweXPvalsCtxStruct {
   uint32_t hwe_midp;
   uint32_t hwe_x_ct;
 
-  double* hwe_x_pvals;
-} ComputeHweXPvalsCtx;
+  double* hwe_x_ln_pvals;
+} ComputeHweXLnPvalsCtx;
 
-void ComputeHweXPvalsMain(uintptr_t tidx, uintptr_t thread_ct, ComputeHweXPvalsCtx* ctx) {
+void ComputeHweXLnPvalsMain(uintptr_t tidx, uintptr_t thread_ct, ComputeHweXLnPvalsCtx* ctx) {
   const uintptr_t* variant_include = ctx->variant_include;
   const uintptr_t* allele_idx_offsets = ctx->allele_idx_offsets;
   const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts) = ctx->founder_raw_geno_cts;
@@ -4797,7 +4797,7 @@ void ComputeHweXPvalsMain(uintptr_t tidx, uintptr_t thread_ct, ComputeHweXPvalsC
   uint32_t variant_idx = (hwe_x_ct * S_CAST(uint64_t, tidx)) / thread_ct;
 
   uintptr_t xgeno_idx = ctx->extra_aidx_starts[tidx];
-  double* hwe_x_pvals_iter = &(ctx->hwe_x_pvals[variant_idx + xgeno_idx]);
+  double* hwe_x_ln_pvals_iter = &(ctx->hwe_x_ln_pvals[variant_idx + xgeno_idx]);
   uintptr_t variant_uidx_base;
   uintptr_t cur_bits;
   BitIter1Start(variant_include, ctx->variant_uidx_starts[tidx], &variant_uidx_base, &cur_bits);
@@ -4830,7 +4830,7 @@ void ComputeHweXPvalsMain(uintptr_t tidx, uintptr_t thread_ct, ComputeHweXPvalsC
       female_1copy_ct -= cur_nosex_geno_cts[1];
       female_0copy_ct -= cur_nosex_geno_cts[2];
     }
-    *hwe_x_pvals_iter++ = HweXchrP(female_1copy_ct, female_2copy_ct, female_0copy_ct, male_1copy_ct, male_0copy_ct, hwe_midp);
+    *hwe_x_ln_pvals_iter++ = HweXchrLnP(female_1copy_ct, female_2copy_ct, female_0copy_ct, male_1copy_ct, male_0copy_ct, hwe_midp);
     if (allele_idx_offsets) {
       const uint32_t allele_ct = allele_idx_offsets[variant_uidx + 1] - allele_idx_offsets[variant_uidx];
       if (allele_ct != 2) {
@@ -4847,7 +4847,7 @@ void ComputeHweXPvalsMain(uintptr_t tidx, uintptr_t thread_ct, ComputeHweXPvalsC
             male_0copy_ct = male_obs_ct - male_1copy_ct - male_hethap_ct;
           }
           female_0copy_ct = female_obs_ct - female_2copy_ct - female_1copy_ct;
-          *hwe_x_pvals_iter++ = HweXchrP(female_1copy_ct, female_2copy_ct, female_0copy_ct, male_1copy_ct, male_0copy_ct, hwe_midp);
+          *hwe_x_ln_pvals_iter++ = HweXchrLnP(female_1copy_ct, female_2copy_ct, female_0copy_ct, male_1copy_ct, male_0copy_ct, hwe_midp);
           ++xgeno_idx;
         }
       }
@@ -4868,26 +4868,26 @@ void ComputeHweXPvalsMain(uintptr_t tidx, uintptr_t thread_ct, ComputeHweXPvalsC
   }
 }
 
-THREAD_FUNC_DECL ComputeHweXPvalsThread(void* raw_arg) {
+THREAD_FUNC_DECL ComputeHweXLnPvalsThread(void* raw_arg) {
   ThreadGroupFuncArg* arg = S_CAST(ThreadGroupFuncArg*, raw_arg);
-  ComputeHweXPvalsCtx* ctx = S_CAST(ComputeHweXPvalsCtx*, arg->sharedp->context);
-  ComputeHweXPvalsMain(arg->tidx, GetThreadCt(arg->sharedp) + 1, ctx);
+  ComputeHweXLnPvalsCtx* ctx = S_CAST(ComputeHweXLnPvalsCtx*, arg->sharedp->context);
+  ComputeHweXLnPvalsMain(arg->tidx, GetThreadCt(arg->sharedp) + 1, ctx);
   THREAD_RETURN;
 }
 
-PglErr ComputeHweXPvals(const uintptr_t* variant_include, const uintptr_t* allele_idx_offsets, const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_male_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_nosex_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_knownsex_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_male_xgeno_cts), uint32_t x_start, uint32_t hwe_x_ct, uintptr_t x_xallele_ct, uint32_t hwe_midp, uint32_t calc_thread_ct, double** hwe_x_pvals_ptr) {
+PglErr ComputeHweXLnPvals(const uintptr_t* variant_include, const uintptr_t* allele_idx_offsets, const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_male_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_nosex_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_knownsex_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_male_xgeno_cts), uint32_t x_start, uint32_t hwe_x_ct, uintptr_t x_xallele_ct, uint32_t hwe_midp, uint32_t calc_thread_ct, double** hwe_x_ln_pvals_ptr) {
   unsigned char* bigstack_mark = g_bigstack_base;
   PglErr reterr = kPglRetSuccess;
   ThreadGroup tg;
   PreinitThreads(&tg);
-  ComputeHweXPvalsCtx ctx;
+  ComputeHweXLnPvalsCtx ctx;
   {
     assert(hwe_x_ct);
-    if (unlikely(bigstack_alloc_d(hwe_x_ct + x_xallele_ct, hwe_x_pvals_ptr))) {
-      goto ComputeHweXPvals_ret_NOMEM;
+    if (unlikely(bigstack_alloc_d(hwe_x_ct + x_xallele_ct, hwe_x_ln_pvals_ptr))) {
+      goto ComputeHweXLnPvals_ret_NOMEM;
     }
     bigstack_mark = g_bigstack_base;
-    ctx.hwe_x_pvals = *hwe_x_pvals_ptr;
+    ctx.hwe_x_ln_pvals = *hwe_x_ln_pvals_ptr;
 
     if (calc_thread_ct > hwe_x_ct) {
       calc_thread_ct = hwe_x_ct;
@@ -4895,7 +4895,7 @@ PglErr ComputeHweXPvals(const uintptr_t* variant_include, const uintptr_t* allel
     if (unlikely(SetThreadCt0(calc_thread_ct - 1, &tg) ||
                  bigstack_alloc_u32(calc_thread_ct, &ctx.variant_uidx_starts) ||
                  bigstack_alloc_w(calc_thread_ct, &ctx.extra_aidx_starts))) {
-      goto ComputeHweXPvals_ret_NOMEM;
+      goto ComputeHweXLnPvals_ret_NOMEM;
     }
     // possible todo: extra-allele-based load balancer
     ComputeUidxStartPartition(variant_include, hwe_x_ct, calc_thread_ct, x_start, ctx.variant_uidx_starts);
@@ -4922,22 +4922,22 @@ PglErr ComputeHweXPvals(const uintptr_t* variant_include, const uintptr_t* allel
     fputs("0%", stdout);
     fflush(stdout);
     if (calc_thread_ct > 1) {
-      SetThreadFuncAndData(ComputeHweXPvalsThread, &ctx, &tg);
+      SetThreadFuncAndData(ComputeHweXLnPvalsThread, &ctx, &tg);
       DeclareLastThreadBlock(&tg);
       if (unlikely(SpawnThreads(&tg))) {
-        goto ComputeHweXPvals_ret_THREAD_CREATE_FAIL;
+        goto ComputeHweXLnPvals_ret_THREAD_CREATE_FAIL;
       }
     }
-    ComputeHweXPvalsMain(calc_thread_ct - 1, calc_thread_ct, &ctx);
+    ComputeHweXLnPvalsMain(calc_thread_ct - 1, calc_thread_ct, &ctx);
     JoinThreads0(&tg);
     fputs("\b\b", stdout);
     logputs("done.\n");
   }
   while (0) {
-  ComputeHweXPvals_ret_NOMEM:
+  ComputeHweXLnPvals_ret_NOMEM:
     reterr = kPglRetNomem;
     break;
-  ComputeHweXPvals_ret_THREAD_CREATE_FAIL:
+  ComputeHweXLnPvals_ret_THREAD_CREATE_FAIL:
     reterr = kPglRetThreadCreateFail;
     break;
   }
@@ -4946,7 +4946,7 @@ PglErr ComputeHweXPvals(const uintptr_t* variant_include, const uintptr_t* allel
   return reterr;
 }
 
-PglErr HardyReport(const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* nonref_flags, const STD_ARRAY_PTR_DECL(uint32_t, 3, hwe_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, autosomal_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, hwe_x_male_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, hwe_x_nosex_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_knownsex_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_male_xgeno_cts), const double* hwe_x_pvals, uint32_t variant_ct, uint32_t hwe_x_ct, uint32_t max_allele_slen, PgenGlobalFlags gflags, double output_min_ln, HardyFlags hardy_flags, uint32_t max_thread_ct, uint32_t nonfounders, char* outname, char* outname_end) {
+PglErr HardyReport(const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* nonref_flags, const STD_ARRAY_PTR_DECL(uint32_t, 3, hwe_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, autosomal_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, hwe_x_male_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, hwe_x_nosex_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_knownsex_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_male_xgeno_cts), const double* hwe_x_ln_pvals, uint32_t variant_ct, uint32_t hwe_x_ct, uint32_t max_allele_slen, PgenGlobalFlags gflags, double output_min_ln, HardyFlags hardy_flags, uint32_t max_thread_ct, uint32_t nonfounders, char* outname, char* outname_end) {
   unsigned char* bigstack_mark = g_bigstack_base;
   char* cswritep = nullptr;
   CompressStreamState css;
@@ -5017,6 +5017,7 @@ PglErr HardyReport(const uintptr_t* variant_include, const ChrInfo* cip, const u
         }
       }
     }
+    const uint32_t report_neglog10p = (hardy_flags / kfHardyLog10) & 1;
     const uint32_t ax_col = hardy_flags & kfHardyColAx;
     const uint32_t gcounts = hardy_flags & (kfHardyColGcounts | kfHardyColGcount1col);
     const uint32_t gcount_1col = hardy_flags & kfHardyColGcount1col;
@@ -5074,11 +5075,13 @@ PglErr HardyReport(const uintptr_t* variant_include, const ChrInfo* cip, const u
       }
       if (p_col) {
         *cswritep++ = '\t';
+        if (report_neglog10p) {
+          cswritep = strcpya_k(cswritep, "NEG_LOG10_");
+        }
         if (midp) {
-          cswritep = strcpya_k(cswritep, "MIDP");
-        } else {
-          *cswritep++ = 'P';
+          cswritep = strcpya_k(cswritep, "MID");
         }
+        *cswritep++ = 'P';
       }
       AppendBinaryEoln(&cswritep);
       uintptr_t variant_uidx_base = 0;
@@ -5206,8 +5209,13 @@ PglErr HardyReport(const uintptr_t* variant_include, const ChrInfo* cip, const u
           if (p_col) {
             // possible todo: multithread this
             *cswritep++ = '\t';
-            const double hwe_p = HweP(het_a1_ct, hom_a1_ct, two_ax_ct, midp);
-            cswritep = dtoa_g(MAXV(hwe_p, output_min_p), cswritep);
+            const double hwe_ln_p = HweLnP(het_a1_ct, hom_a1_ct, two_ax_ct, midp);
+            if (report_neglog10p) {
+              const double reported_val = (-kRecipLn10) * hwe_ln_p;
+              cswritep = dtoa_g(reported_val, cswritep);
+            } else {
+              cswritep = lntoa_g(MAXV(hwe_ln_p, output_min_ln), cswritep);
+            }
           }
           AppendBinaryEoln(&cswritep);
           if (unlikely(Cswrite(&css, &cswritep))) {
@@ -5294,19 +5302,23 @@ PglErr HardyReport(const uintptr_t* variant_include, const ChrInfo* cip, const u
       const uint32_t femalep_col = hardy_flags & kfHardyColFemalep;
       if (femalep_col) {
         cswritep = strcpya_k(cswritep, "\tFEMALE_ONLY_");
+        if (report_neglog10p) {
+          cswritep = strcpya_k(cswritep, "NEG_LOG10_");
+        }
         if (midp) {
-          cswritep = strcpya_k(cswritep, "MIDP");
-        } else {
-          *cswritep++ = 'P';
+          cswritep = strcpya_k(cswritep, "MID");
         }
+        *cswritep++ = 'P';
       }
       if (p_col) {
         *cswritep++ = '\t';
+        if (report_neglog10p) {
+          cswritep = strcpya_k(cswritep, "NEG_LOG10_");
+        }
         if (midp) {
-          cswritep = strcpya_k(cswritep, "MIDP");
-        } else {
-          *cswritep++ = 'P';
+          cswritep = strcpya_k(cswritep, "MID");
         }
+        *cswritep++ = 'P';
       }
       AppendBinaryEoln(&cswritep);
       fputs("--hardy: Writing chrX results...", stdout);
@@ -5416,8 +5428,8 @@ PglErr HardyReport(const uintptr_t* variant_include, const ChrInfo* cip, const u
               male_ax_ct = male_obs_ct - male_a1_ct - male_hethap_ct;
             }
             female_two_ax_ct = female_obs_ct - female_hom_a1_ct - female_het_a1_ct;
-            // Correct to increment this before looking up hwe_x_pvals[] (and
-            // to not increment on a1_idx == 0).
+            // Correct to increment this before looking up hwe_x_ln_pvals[]
+            // (and to not increment on a1_idx == 0).
             ++xgeno_idx;
           }
           if (gcounts) {
@@ -5454,14 +5466,35 @@ PglErr HardyReport(const uintptr_t* variant_include, const ChrInfo* cip, const u
           }
           if (femalep_col) {
             *cswritep++ = '\t';
-            const double female_hwe_p = HweP(female_het_a1_ct, female_hom_a1_ct, female_two_ax_ct, midp);
-            cswritep = dtoa_g(MAXV(female_hwe_p, output_min_p), cswritep);
+            const double female_hwe_ln_p = HweLnP(female_het_a1_ct, female_hom_a1_ct, female_two_ax_ct, midp);
+            if (report_neglog10p) {
+              const double reported_val = (-kRecipLn10) * female_hwe_ln_p;
+              cswritep = dtoa_g(reported_val, cswritep);
+            } else {
+              cswritep = lntoa_g(MAXV(female_hwe_ln_p, output_min_ln), cswritep);
+            }
           }
           if (p_col) {
             *cswritep++ = '\t';
             // bugfix (27 Jun 2020): forgot to correct this for multiallelic
             // variants
-            cswritep = dtoa_g(MAXV(hwe_x_pvals[variant_idx + xgeno_idx], output_min_p), cswritep);
+
+            // temporary
+            const double ln_pval = hwe_x_ln_pvals[variant_idx + xgeno_idx];
+            if (report_neglog10p) {
+              if (ln_pval != -750) {
+                const double reported_val = (-kRecipLn10) * ln_pval;
+                cswritep = dtoa_g(reported_val, cswritep);
+              } else {
+                cswritep = strcpya_k(cswritep, "inf");
+              }
+            } else {
+              if (ln_pval != -750) {
+                cswritep = lntoa_g(MAXV(ln_pval, output_min_ln), cswritep);
+              } else {
+                cswritep = dtoa_g(output_min_p, cswritep);
+              }
+            }
           }
           AppendBinaryEoln(&cswritep);
           if (unlikely(Cswrite(&css, &cswritep))) {
diff --git a/2.0/plink2_misc.h b/2.0/plink2_misc.h
index 14b1cba1..c27bb171 100644
--- a/2.0/plink2_misc.h
+++ b/2.0/plink2_misc.h
@@ -180,22 +180,23 @@ FLAGSET_DEF_START()
   kfHardy0,
   kfHardyZs = (1 << 0),
   kfHardyMidp = (1 << 1),
-  kfHardyRedundant = (1 << 2),
-
-  kfHardyColChrom = (1 << 3),
-  kfHardyColPos = (1 << 4),
-  kfHardyColRef = (1 << 5),
-  kfHardyColAlt1 = (1 << 6),
-  kfHardyColAlt = (1 << 7),
-  kfHardyColMaybeprovref = (1 << 8),
-  kfHardyColProvref = (1 << 9),
-  kfHardyColAx = (1 << 10),
-  kfHardyColGcounts = (1 << 11),
-  kfHardyColGcount1col = (1 << 12),
-  kfHardyColHetfreq = (1 << 13),
-  kfHardyColSexaf = (1 << 14),
-  kfHardyColFemalep = (1 << 15),
-  kfHardyColP = (1 << 16),
+  kfHardyLog10 = (1 << 2),
+  kfHardyRedundant = (1 << 3),
+
+  kfHardyColChrom = (1 << 4),
+  kfHardyColPos = (1 << 5),
+  kfHardyColRef = (1 << 6),
+  kfHardyColAlt1 = (1 << 7),
+  kfHardyColAlt = (1 << 8),
+  kfHardyColMaybeprovref = (1 << 9),
+  kfHardyColProvref = (1 << 10),
+  kfHardyColAx = (1 << 11),
+  kfHardyColGcounts = (1 << 12),
+  kfHardyColGcount1col = (1 << 13),
+  kfHardyColHetfreq = (1 << 14),
+  kfHardyColSexaf = (1 << 15),
+  kfHardyColFemalep = (1 << 16),
+  kfHardyColP = (1 << 17),
   kfHardyColDefault = (kfHardyColChrom | kfHardyColMaybeprovref | kfHardyColAx | kfHardyColGcounts | kfHardyColHetfreq | kfHardyColSexaf | kfHardyColP),
   kfHardyColAll = ((kfHardyColP * 2) - kfHardyColChrom)
 FLAGSET_DEF_END(HardyFlags);
@@ -398,7 +399,7 @@ PglErr WriteMissingnessReports(const uintptr_t* sample_include, const SampleIdIn
 
 PglErr GetMultiallelicMarginalCounts(const uintptr_t* founder_info, const uintptr_t* sex_nm, const uintptr_t* sex_male, const uintptr_t* variant_include, const ChrInfo* cip, const uintptr_t* allele_idx_offsets, const STD_ARRAY_PTR_DECL(uint32_t, 3, hwe_geno_cts), uint32_t raw_sample_ct, uint32_t autosomal_variant_ct, uint32_t autosomal_xallele_ct, uint32_t hwe_x_ct, uint32_t x_xallele_ct, PgenReader* simple_pgrp, STD_ARRAY_PTR_DECL(uint32_t, 2, x_male_xgeno_cts), STD_ARRAY_PTR_DECL(uint32_t, 2, autosomal_xgeno_cts), STD_ARRAY_PTR_DECL(uint32_t, 2, x_knownsex_xgeno_cts));
 
-PglErr ComputeHweXPvals(const uintptr_t* variant_include, const uintptr_t* allele_idx_offsets, const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_male_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_nosex_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_knownsex_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_male_xgeno_cts), uint32_t x_start, uint32_t hwe_x_ct, uintptr_t x_xallele_ct, uint32_t hwe_midp, uint32_t calc_thread_ct, double** hwe_x_pvals_ptr);
+PglErr ComputeHweXLnPvals(const uintptr_t* variant_include, const uintptr_t* allele_idx_offsets, const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_male_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_nosex_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_knownsex_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_male_xgeno_cts), uint32_t x_start, uint32_t hwe_x_ct, uintptr_t x_xallele_ct, uint32_t hwe_midp, uint32_t calc_thread_ct, double** hwe_x_ln_pvals_ptr);
 
 PglErr HardyReport(const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* nonref_flags, const STD_ARRAY_PTR_DECL(uint32_t, 3, hwe_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, autosomal_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, hwe_x_male_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 3, hwe_x_nosex_geno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_knownsex_xgeno_cts), const STD_ARRAY_PTR_DECL(uint32_t, 2, x_male_xgeno_cts), const double* hwe_x_pvals, uint32_t variant_ct, uint32_t hwe_x_ct, uint32_t max_allele_slen, PgenGlobalFlags gflags, double output_min_ln, HardyFlags hardy_flags, uint32_t max_thread_ct, uint32_t nonfounders, char* outname, char* outname_end);