Skip to content

Commit

Permalink
--vcf-allow-no-nonvar
Browse files Browse the repository at this point in the history
  • Loading branch information
chrchang committed Aug 18, 2024
1 parent 73cc899 commit 0cbe314
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 5 deletions.
9 changes: 9 additions & 0 deletions 2.0/plink2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10981,6 +10981,15 @@ int main(int argc, char** argv) {
}
import_flags |= kfImportVcfRefNMissing;
goto main_param_zero;
} else if (strequal_k_unsafe(flagname_p2, "cf-allow-no-nonvar")) {
// Don't bother with --bcf, since GATK GenotypeGVCFs only generates
// VCFs.
if (unlikely(!(xload & kfXloadVcf))) {
logerrputs("Error: --vcf-allow-no-nonvar must be used with --vcf.\n");
goto main_ret_INVALID_CMDLINE;
}
import_flags |= kfImportVcfAllowNoNonvar;
goto main_param_zero;
} else if (strequal_k_unsafe(flagname_p2, "if")) {
if (unlikely(!(pc.command_flags1 & kfCommand1Glm))) {
logerrputs("Error: --vif must be used with --glm/--epistasis.\n");
Expand Down
28 changes: 23 additions & 5 deletions 2.0/plink2_import.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3035,6 +3035,8 @@ PglErr VcfToPgen(const char* vcfname, const char* preexisting_psamname, const ch
uint32_t max_allele_slen = 1;
uint32_t max_qualfilterinfo_slen = 6;
uint32_t phase_or_dosage_found = 0;
uint32_t not_single_sample_no_nonvar = (sample_ct != 1) || format_dosage_relevant || format_hds_search || (import_flags & kfImportVcfAllowNoNonvar);
uint32_t nonvar_nonmissing_ct = 0;

while (1) {
++line_idx;
Expand Down Expand Up @@ -3198,7 +3200,7 @@ PglErr VcfToPgen(const char* vcfname, const char* preexisting_psamname, const ch
if (unlikely(*format_end != '\t')) {
goto VcfToPgen_ret_MISSING_TOKENS;
}
if (phase_or_dosage_found) {
if (phase_or_dosage_found && not_single_sample_no_nonvar) {
goto VcfToPgen_linescan_done;
}

Expand All @@ -3222,6 +3224,8 @@ PglErr VcfToPgen(const char* vcfname, const char* preexisting_psamname, const ch

// Check if there's at least one phased het call, and/or at least one
// relevant dosage.
// If there's only one sample, maybe also check whether the VCF has any
// hom-REF calls at all.
// Don't bother multithreading this since it's trivial.
if ((vic.hds_field_idx != UINT32_MAX) || (vic.dosage_field_idx != UINT32_MAX)) {
if (alt_ct == 1) {
Expand All @@ -3239,10 +3243,21 @@ PglErr VcfToPgen(const char* vcfname, const char* preexisting_psamname, const ch
if (!vic.vibc.gt_exists) {
goto VcfToPgen_linescan_done;
}
if (alt_ct < 10) {
phase_or_dosage_found = VcfScanShortallelicLine(&(vic.vibc), format_end, &line_iter);
} else {
phase_or_dosage_found = VcfScanLongallelicLine(&(vic.vibc), format_end, &line_iter);
if (!not_single_sample_no_nonvar) {
if (format_end[1] == '0') {
const char cc = format_end[2];
not_single_sample_no_nonvar = (((cc == '/') || (cc == '|')) && (format_end[3] == '0')) || (cc == ':') || (cc == '\t');
}
// This can miss ./0 and the like, but that should practically
// never matter.
nonvar_nonmissing_ct += (format_end[1] != '.');
}
if (!phase_or_dosage_found) {
if (alt_ct < 10) {
phase_or_dosage_found = VcfScanShortallelicLine(&(vic.vibc), format_end, &line_iter);
} else {
phase_or_dosage_found = VcfScanLongallelicLine(&(vic.vibc), format_end, &line_iter);
}
}
}
VcfToPgen_linescan_done:
Expand Down Expand Up @@ -3282,6 +3297,9 @@ PglErr VcfToPgen(const char* vcfname, const char* preexisting_psamname, const ch
} else {
logprintf("--vcf: %u variant%s scanned (%" PRIuPTR " skipped).\n", variant_ct, (variant_ct == 1)? "" : "s", variant_skip_ct);
}
if ((!not_single_sample_no_nonvar) && (nonvar_nonmissing_ct >= 1000)) {
logerrputs("Warning: All genotypes contain at least one ALT allele; this implies the VCF\nwas incorrectly generated. You probably need to backtrack and e.g. rerun GATK\nGenotypeGVCFs with the --include-non-variant-sites flag added.\n");
}

if (allele_idx_end > 2 * variant_ct) {
allele_idx_offsets[variant_ct] = allele_idx_end;
Expand Down

0 comments on commit 0cbe314

Please sign in to comment.