Skip to content

Commit

Permalink
--glm residualize determinism fix, allow double-precision
Browse files Browse the repository at this point in the history
  • Loading branch information
chrchang committed Mar 19, 2024
1 parent e2ece6f commit 3848a39
Show file tree
Hide file tree
Showing 10 changed files with 541 additions and 280 deletions.
6 changes: 3 additions & 3 deletions 2.0/plink2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ static const char ver_str[] = "PLINK v2.00a6"
#elif defined(USE_AOCL)
" AMD"
#endif
" (15 Mar 2024)";
" (18 Mar 2024)";
static const char ver_str2[] =
// include leading space if day < 10, so character length stays the same
""
Expand Down Expand Up @@ -6135,8 +6135,8 @@ int main(int argc, char** argv) {
logputs("Note: 'firth-residualize' is redundant when 'cc-residualize' is already\nspecified.\n");
pc.glm_info.flags ^= kfGlmFirthResidualize;
}
if (unlikely((pc.glm_info.flags & (kfGlmHideCovar | kfGlmSinglePrecCc)) != (kfGlmHideCovar | kfGlmSinglePrecCc))) {
logerrputs("Error: --glm 'cc-residualize'/'firth-residualize' requires 'hide-covar' and\n'single-prec-cc' to be specified as well.\n");
if (unlikely(!(pc.glm_info.flags & kfGlmHideCovar))) {
logerrputs("Error: --glm 'cc-residualize'/'firth-residualize' requires 'hide-covar' to be\nspecified as well.\n");
goto main_ret_INVALID_CMDLINE_A;
}
if (unlikely(pc.glm_info.flags & kfGlmInteraction)) {
Expand Down
4 changes: 2 additions & 2 deletions 2.0/plink2_cmdline.h
Original file line number Diff line number Diff line change
Expand Up @@ -1071,11 +1071,11 @@ HEADER_INLINE BoolErr bigstack_end_calloc_kcp(uintptr_t ct, const char*** kcp_ar
}

#ifdef __LP64__
HEADER_INLINE BoolErr bigstack_end_calloc64_w(uintptr_t ct, uintptr_t** w_arr_ptr) {
HEADER_INLINE BoolErr bigstack_end_calloc64_w(uint64_t ct, uintptr_t** w_arr_ptr) {
return bigstack_end_calloc_w(ct, w_arr_ptr);
}
#else
BoolErr bigstack_end_calloc64_w(uintptr_t ct, uintptr_t** w_arr_ptr);
BoolErr bigstack_end_calloc64_w(uint64_t ct, uintptr_t** w_arr_ptr);
#endif

HEADER_INLINE void bigstack_end_clalign() {
Expand Down
4 changes: 2 additions & 2 deletions 2.0/plink2_filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2281,8 +2281,8 @@ PglErr ReadAlleleFreqs(const uintptr_t* variant_include, const char* const* vari
}
const uint32_t raw_variant_ctl = BitCtToWordCt(raw_variant_ct);
double* cur_allele_freqs;
uintptr_t* matched_loaded_alleles;
uintptr_t* matched_internal_alleles;
uintptr_t* matched_loaded_alleles = nullptr; // spurious g++ 4.8 warning
uintptr_t* matched_internal_alleles = nullptr;
uint32_t* loaded_to_internal_allele_idx;
uintptr_t* already_seen;
if (unlikely(bigstack_calloc_w(raw_variant_ctl, variant_afreqcalcp) ||
Expand Down
743 changes: 502 additions & 241 deletions 2.0/plink2_glm_logistic.cc

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions 2.0/plink2_glm_logistic.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,10 @@ typedef struct {
} LogisticAuxResult;

typedef struct CcResidualizeCtxStruct {
float* logistic_nm_sample_offsets;
float* firth_nm_sample_offsets;
float* logistic_nm_sample_offsets_f;
float* firth_nm_sample_offsets_f;
double* logistic_nm_sample_offsets_d;
double* firth_nm_sample_offsets_d;
uint32_t prefitted_pred_ct;
uint32_t domdev_present_p1;
uint32_t sample_ct;
Expand Down
18 changes: 9 additions & 9 deletions 2.0/plink2_help.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1310,15 +1310,15 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
" regression whenever the logistic regression fails to converge. This is\n"
" now the default.\n"
" * 'firth' requests Firth regression all the time.\n"
" * To trade off some accuracy for speed, you can use the 'single-prec-cc'\n"
" modifier to request use of single-precision instead of double-precision\n"
" floating-point numbers during logistic and Firth regression.\n"
" * If that's not enough, you can also use the 'firth-residualize' or\n"
" 'cc-residualize' modifier, which implements the shortcut described in\n"
" Mbatchou J et al. (2021) Computationally efficient whole genome\n"
" regression for quantitative and binary traits.\n"
" * These must be used with 'hide-covar' and 'single-prec-cc', and disable\n"
" some other --glm features.\n"
" * To trade off some accuracy for speed:\n"
" * You can use the 'single-prec-cc' modifier to request use of\n"
" single-precision instead of double-precision floating-point numbers\n"
" during logistic and Firth regression.\n"
" * You can use the 'firth-residualize' or 'cc-residualize' modifier, which\n"
" implements the shortcut described in Mbatchou J et al. (2021)\n"
" Computationally efficient whole genome regression for quantitative and\n"
" binary traits. (These must be used with 'hide-covar', and disable some\n"
" other --glm features.)\n"
" * To add covariates which are not constant across all variants, add the\n"
" 'local-covar=' and 'local-psam=' modifiers, use full filenames for each,\n"
" and use either 'local-pvar=' or 'local-pos-cols=' to provide variant ID\n"
Expand Down
6 changes: 3 additions & 3 deletions 2.0/plink2_import.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7598,13 +7598,13 @@ PglErr BcfToPgen(const char* bcfname, const char* preexisting_psamname, const ch
uintptr_t* bcf_contig_keep;
const char** contig_names;
uint32_t* contig_slens;
const char** contig_out_names;
uint32_t* contig_out_slens;
const char** contig_out_names = nullptr; // spurious g++ 4.8 warning
uint32_t* contig_out_slens = nullptr;
char* contig_out_buf;
const char** fif_strings;
uint32_t* fif_slens;
uintptr_t* bcf_contig_seen;
uintptr_t* sample_nypbuf;
uintptr_t* sample_nypbuf = nullptr;
if (unlikely(bigstack_calloc_w(BitCtToWordCt(contig_string_idx_end), &bcf_contig_keep) ||
bigstack_calloc_kcp(contig_string_idx_end, &contig_names) ||
bigstack_calloc_u32(contig_string_idx_end, &contig_slens) ||
Expand Down
10 changes: 5 additions & 5 deletions 2.0/plink2_ld.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1812,11 +1812,11 @@ PglErr IndepPairphase(const uintptr_t* variant_include, const ChrInfo* cip, cons
const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
IndepPairphaseCtx ctx;
uintptr_t* genovec;
uintptr_t* phasepresent;
uintptr_t* phaseinfo;
uintptr_t* raw_genovec;
uintptr_t* raw_phasepresent;
uintptr_t* raw_phaseinfo;
uintptr_t* phasepresent = nullptr; // spurious g++ 4.8 warning
uintptr_t* phaseinfo = nullptr;
uintptr_t* raw_genovec = nullptr;
uintptr_t* raw_phasepresent = nullptr;
uintptr_t* raw_phaseinfo = nullptr;
uint32_t* thread_last_subcontig;
uint32_t* thread_subcontig_start_tvidx;
uint32_t* thread_last_tvidx;
Expand Down
22 changes: 10 additions & 12 deletions 2.0/plink2_matrix_calc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4250,20 +4250,18 @@ PglErr CalcGrm(const uintptr_t* orig_sample_include, const SampleIdInfo* siip, c
if (unlikely(SetThreadCt(calc_thread_ct, &tg))) {
goto CalcGrm_ret_NOMEM;
}
{
if (unlikely(bigstack_calloc64_d(S_CAST(uint64_t, row_end_idx - row_start_idx) * row_end_idx, &grm))) {
if (!grm_ptr) {
logerrputs("Error: Out of memory. If you are SURE you are performing the right matrix\ncomputation, you can split it into smaller pieces with --parallel, and then\nconcatenate the results. But before you try this, make sure the program you're\nproviding the matrix to can actually handle such a large input file.\n");
if (unlikely(bigstack_calloc64_d(S_CAST(uint64_t, row_end_idx - row_start_idx) * row_end_idx, &grm))) {
if (!grm_ptr) {
logerrputs("Error: Out of memory. If you are SURE you are performing the right matrix\ncomputation, you can split it into smaller pieces with --parallel, and then\nconcatenate the results. But before you try this, make sure the program you're\nproviding the matrix to can actually handle such a large input file.\n");
} else {
// Need to edit this if there are ever non-PCA ways to get here.
if (!(grm_flags & (kfGrmMatrixShapemask | kfGrmListmask | kfGrmBin))) {
logerrputs("Error: Out of memory. Consider \"--pca approx\" instead.\n");
} else {
// Need to edit this if there are ever non-PCA ways to get here.
if (!(grm_flags & (kfGrmMatrixShapemask | kfGrmListmask | kfGrmBin))) {
logerrputs("Error: Out of memory. Consider \"--pca approx\" instead.\n");
} else {
logerrputs("Error: Out of memory. Consider \"--pca approx\" (and not writing the GRM to\ndisk) instead.\n");
}
logerrputs("Error: Out of memory. Consider \"--pca approx\" (and not writing the GRM to\ndisk) instead.\n");
}
goto CalcGrm_ret_NOMEM_CUSTOM;
}
goto CalcGrm_ret_NOMEM_CUSTOM;
}
ctx.sample_ct = row_end_idx;
ctx.grm = grm;
Expand Down Expand Up @@ -5267,7 +5265,7 @@ PglErr CalcPca(const uintptr_t* sample_include, const SampleIdInfo* siip, const
const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
uint32_t* pca_sample_include_cumulative_popcounts;
PgenVariant pgv;
double* allele_1copy_buf;
double* allele_1copy_buf = nullptr; // spurious g++ 4.8 warning
double* eigvals;
CalcPcaCtx ctx;
if (unlikely(bigstack_alloc_u32(raw_sample_ctl, &pca_sample_include_cumulative_popcounts) ||
Expand Down
2 changes: 1 addition & 1 deletion 2.0/plink2_misc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ PglErr RecoverVarIds(const char* fname, const uintptr_t* variant_include, const
uintptr_t* chr_already_seen;
uintptr_t* already_seen;
uintptr_t* conflict_bitarr;
char** orig_alt_starts;
char** orig_alt_starts = nullptr; // spurious g++ 4.8 warning
if (unlikely(bigstack_alloc_cp(raw_variant_ct, &variant_ids_copy) ||
bigstack_calloc_w(BitCtToWordCt(chr_code_end), &chr_already_seen) ||
bigstack_calloc_w(raw_variant_ctl, &already_seen) ||
Expand Down

0 comments on commit 3848a39

Please sign in to comment.