From 4df61fd6e5f9c3d31ddd42204c03fcce7076132a Mon Sep 17 00:00:00 2001 From: rxu17 <26471741+rxu17@users.noreply.github.com> Date: Thu, 2 Nov 2023 18:03:24 -0700 Subject: [PATCH] add to be class attributes --- genie_registry/maf.py | 21 +++++++++++++-------- genie_registry/vcf.py | 15 +++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/genie_registry/maf.py b/genie_registry/maf.py index 4575d189..bb1f2cd1 100644 --- a/genie_registry/maf.py +++ b/genie_registry/maf.py @@ -70,6 +70,8 @@ class maf(FileTypeFormat): _fileType = "maf" _process_kwargs = [] + _allele_cols = ["REFERENCE_ALLELE", "TUMOR_SEQ_ALLELE1", "TUMOR_SEQ_ALLELE2"] + _allowed_alleles = ["A", "T", "C", "G", "N", " ", "-"] def _validateFilename(self, filePath): """ @@ -294,21 +296,24 @@ def _validate(self, mutationDF): ) total_error.write(errors) warning.write(warnings) - - # TODO: add these lists as class attribute or global - allele_cols = ["REFERENCE_ALLELE", "TUMOR_SEQ_ALLELE1", "TUMOR_SEQ_ALLELE2"] - allowed_alleles = ['A','T','C','G','N', ' ', '-'] - for allele_col in allele_cols: + + for allele_col in self._allele_cols: if process_functions.checkColExist(mutationDF, allele_col): invalid_indices = validate.get_invalid_allele_rows( - mutationDF, allele_col, allowed_alleles = allowed_alleles, ignore_case = True + mutationDF, + allele_col, + allowed_alleles=self._allowed_alleles, + ignore_case=True, ) errors, warnings = validate.get_allele_validation_message( - invalid_indices, invalid_col = allele_col, allowed_alleles = allowed_alleles, fileformat="maf" + invalid_indices, + invalid_col=allele_col, + allowed_alleles=self._allowed_alleles, + fileformat=self._fileType, ) total_error.write(errors) warning.write(warnings) - + return total_error.getvalue(), warning.getvalue() def _cross_validate(self, mutationDF: pd.DataFrame) -> tuple: diff --git a/genie_registry/vcf.py b/genie_registry/vcf.py index 75525bd2..78406d92 100644 --- a/genie_registry/vcf.py +++ b/genie_registry/vcf.py @@ -18,6 +18,8 @@ class vcf(FileTypeFormat): _fileType = "vcf" _process_kwargs = [] + _allele_col = "REF" + _allowed_alleles = ["A", "T", "C", "G", "N"] def _validateFilename(self, filePath): basename = os.path.basename(filePath[0]) @@ -137,18 +139,15 @@ def _validate(self, vcfdf): total_error += error warning += warn - # TODO: add this as class attribute or global - allele_col = "REF" - allowed_alleles = ["A", "T", "C", "G", "N"] - if process_functions.checkColExist(vcfdf, allele_col): + if process_functions.checkColExist(vcfdf, self._allele_col): invalid_indices = validate.get_invalid_allele_rows( - vcfdf, allele_col, allowed_alleles=allowed_alleles, ignore_case=True + vcfdf, self._allele_col, allowed_alleles=self._allowed_alleles, ignore_case=True ) errors, warnings = validate.get_allele_validation_message( invalid_indices, - invalid_col=allele_col, - allowed_alleles=allowed_alleles, - fileformat="vcf", + invalid_col=self._allele_col, + allowed_alleles=self._allowed_alleles, + fileformat=self._fileType, ) total_error += errors warning += warnings