Skip to content

Commit

Permalink
in the middle of adding gzip-writing support
Browse files Browse the repository at this point in the history
  • Loading branch information
stschiff committed Nov 8, 2024
1 parent fa98d3e commit ac97fdf
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 16 deletions.
19 changes: 14 additions & 5 deletions src/Poseidon/CLI/Forge.hs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ data ForgeOptions = ForgeOptions
, _forgeIntersect :: Bool
, _forgeOutFormat :: String
, _forgeOutMode :: ForgeOutMode
, _forgeOutZip :: Bool
, _forgeOutPacPath :: FilePath
, _forgeOutPacName :: Maybe String
, _forgePackageWise :: Bool
Expand Down Expand Up @@ -108,7 +109,7 @@ runForge :: ForgeOptions -> PoseidonIO ()
runForge (
ForgeOptions genoSources
entityInputs maybeSnpFile intersect_
outFormat outMode outPathRaw maybeOutName
outFormat outMode outZip outPathRaw maybeOutName
packageWise outPlinkPopMode
outputOrdered
) = do
Expand Down Expand Up @@ -184,15 +185,23 @@ runForge (
Nothing -> snpSetMergeList snpSetList intersect_
Just _ -> SNPSetOther
-- compile genotype data structure
genotypeFileData <- case outFormat of
"EIGENSTRAT" -> return $
genotypeFileData <- case (outFormat, outZip) of
("EIGENSTRAT", False) -> return $
GenotypeEigenstrat (outName <.> ".geno") Nothing
(outName <.> ".snp") Nothing
(outName <.> ".ind") Nothing
"PLINK" -> return $
("EIGENSTRAT", True) -> return $
GenotypeEigenstrat (outName <.> ".geno.gz") Nothing
(outName <.> ".snp.gz") Nothing
(outName <.> ".ind") Nothing
("PLINK", False) -> return $
GenotypePlink (outName <.> ".bed") Nothing
(outName <.> ".bim") Nothing
(outName <.> ".fam") Nothing
("PLINK", True) -> return $
GenotypePlink (outName <.> ".bed.gz") Nothing
(outName <.> ".bim.gz") Nothing
(outName <.> ".fam") Nothing
_ -> liftIO . throwIO $
PoseidonGenericException ("Illegal outFormat " ++ outFormat ++ ". Only Outformats EIGENSTRAT or PLINK are allowed at the moment")
let genotypeData = GenotypeDataSpec genotypeFileData (Just newSNPSet)
Expand Down Expand Up @@ -270,7 +279,7 @@ runForge (
let fullSourcePath = posPacBaseDir pacSource </> path
liftIO $ checkFile fullSourcePath Nothing
liftIO $ copyFile fullSourcePath $ outPath </> path
compileGenotypeData :: FilePath -> GenotypeFileSpec -> [PoseidonPackage] -> [Int] -> PoseidonIO (VUM.IOVector Int)
compileGenotypeData :: FilePath -> GenotypeFileSpec -> [PoseidonPackage] -> [Int] -> PoseidonIO (VUM.IOVector Int)
compileGenotypeData outPath gFileSpec relevantPackages relevantIndices = do
logInfo "Compiling genotype data"
logInfo "Processing SNPs..."
Expand Down
16 changes: 10 additions & 6 deletions src/Poseidon/CLI/Genoconvert.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module Poseidon.CLI.Genoconvert where
import Poseidon.EntityTypes (HasNameAndVersion (..))
import Poseidon.GenotypeData (GenoDataSource (..),
GenotypeDataSpec (..),
GenotypeFileSpec (..), getFormat,
GenotypeFileSpec (..),
loadGenotypeData,
printSNPCopyProgress)
import Poseidon.Janno (jannoRows2EigenstratIndEntries)
Expand Down Expand Up @@ -42,6 +42,7 @@ data GenoconvertOptions = GenoconvertOptions
, _genoconvertRemoveOld :: Bool
, _genoconvertOutPlinkPopMode :: PlinkPopNameMode
, _genoconvertOnlyLatest :: Bool
, _genoconvertOutZip :: Bool
}

runGenoconvert :: GenoconvertOptions -> PoseidonIO ()
Expand Down Expand Up @@ -73,12 +74,15 @@ convertGenoTo outFormat onlyGeno outPath removeOld outPlinkPopMode pac = do
++ ":"
-- compile file names paths
let outName = getPacName . posPacNameAndVersion $ pac
(outInd, outSnp, outGeno) <- case outFormat of
"EIGENSTRAT" -> return (outName <.> ".ind", outName <.> ".snp", outName <.> ".geno")
"PLINK" -> return (outName <.> ".fam", outName <.> ".bim", outName <.> ".bed")
_ -> liftIO . throwIO $ PoseidonGenericException ("Illegal outFormat " ++ outFormat ++ ". Only Outformats EIGENSTRAT or PLINK are allowed at the moment")
(outInd, outSnp, outGeno) <- case (outFormat, outZip) of
("EIGENSTRAT", False) -> return (outName <.> ".ind", outName <.> ".snp" , outName <.> ".geno" )
("EIGENSTRAT", True ) -> return (outName <.> ".ind", outName <.> ".snp.gz", outName <.> ".geno.gz")
("PLINK", False) -> return (outName <.> ".fam", outName <.> ".bim" , outName <.> ".bed" )
("PLINK", True ) -> return (outName <.> ".fam", outName <.> ".bim.gz", outName <.> ".bed.gz" )
_ -> liftIO . throwIO $
PoseidonGenericException ("Illegal outFormat " ++ outFormat ++ ". Only Outformats EIGENSTRAT or PLINK are allowed at the moment")
-- check if genotype data needs conversion
if getFormat (genotypeFileSpec (posPacGenotypeData pac)) == outFormat
if getFormat (genotypeFileSpec (posPacGenotypeData pac)) == (outFormat, outZip)
then logWarning "The genotype data is already in the requested format"
else do
-- create new genotype data files
Expand Down
5 changes: 0 additions & 5 deletions src/Poseidon/GenotypeData.hs
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,6 @@ data GenotypeFileSpec = GenotypeEigenstrat {
_vcfGenoFileChkSum :: Maybe String
} deriving (Show, Eq)

getFormat :: GenotypeFileSpec -> String
getFormat (GenotypeEigenstrat _ _ _ _ _ _) = "EIGENSTRAT"
getFormat (GenotypePlink _ _ _ _ _ _) = "PLINK"
getFormat (GenotypeVCF _ _ ) = "VCF"

-- | To facilitate automatic parsing of GenotypeDataSpec from JSON files
instance FromJSON GenotypeDataSpec where
parseJSON = withObject "GenotypeData" $ \v -> do
Expand Down

0 comments on commit ac97fdf

Please sign in to comment.