diff --git a/out/ass.jar b/out/ass.jar new file mode 100644 index 0000000..3913ada Binary files /dev/null and b/out/ass.jar differ diff --git a/out/metacherchant.bat b/out/metacherchant.bat index 3f4060e..a823a9f 100644 Binary files a/out/metacherchant.bat and b/out/metacherchant.bat differ diff --git a/out/metacherchant.jar b/out/metacherchant.jar index c085a62..0d91e47 100644 Binary files a/out/metacherchant.jar and b/out/metacherchant.jar differ diff --git a/out/metacherchant.sh b/out/metacherchant.sh old mode 100644 new mode 100755 index 15e2ec1..edf0506 Binary files a/out/metacherchant.sh and b/out/metacherchant.sh differ diff --git a/src/algo/AssemblerCalculator.java b/src/algo/AssemblerCalculator.java new file mode 100644 index 0000000..c285462 --- /dev/null +++ b/src/algo/AssemblerCalculator.java @@ -0,0 +1,104 @@ +package algo; + +import org.apache.log4j.Logger; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; + +/** + * Created by -- on 13.02.2018. + */ +public class AssemblerCalculator implements Runnable { + private final String assembler; + private final String assemblerPath; + private final String outputPrefix; + private final int readsNumber; + private final Logger logger; + + public AssemblerCalculator(String assembler, String assemblerPath, String outputPrefix, int readsNumber, Logger logger) { + this.assembler = assembler; + this.assemblerPath = assemblerPath; + this.outputPrefix = outputPrefix; + this.readsNumber = readsNumber; + this.logger = logger; + } + + private void runAssembler() { + if (assembler.equals("spades")) { + try { + String[] commandA = {"python", assemblerPath + "/spades.py", "--12", + outputPrefix + "cutReads" + readsNumber + ".fastq", "-o", outputPrefix + "out_spades" + readsNumber}; + String[] commandR = {"mv", outputPrefix + "out_spades" + readsNumber + "/" + "contigs.fasta", + outputPrefix + "contigs" + readsNumber + ".fasta"}; + ProcessBuilder procBuilder = new ProcessBuilder(commandA); + ProcessBuilder procBuilderR = new ProcessBuilder(commandR); + + procBuilder.redirectErrorStream(true); + Process process = procBuilder.start(); + InputStream stdout = process.getInputStream(); + InputStreamReader isrStdout = new InputStreamReader(stdout); + BufferedReader brStdout = new BufferedReader(isrStdout); + String line; + while ((line = brStdout.readLine()) != null) { + logger.info(line); + } + // or better take assembly_graph.gfa? + + procBuilderR.redirectErrorStream(true); + Process processB = procBuilderR.start(); + InputStream stdoutB = processB.getInputStream(); + InputStreamReader isrStdoutB = new InputStreamReader(stdoutB); + BufferedReader brStdoutB = new BufferedReader(isrStdoutB); + String lineB; + while ((lineB = brStdoutB.readLine()) != null) { + logger.info(lineB); + } + } catch (IOException e) { + e.printStackTrace(); + logger.info(e.getMessage()); + } + } + if (assembler.equals("megahit")) { + try { + String[] commandA = {assemblerPath + "/megahit", "--12", + outputPrefix + "cutReads" + readsNumber + ".fastq", "-o", outputPrefix + "out_megahit" + readsNumber}; + String[] commandR = {"mv", outputPrefix + "out_megahit" + readsNumber + "/" + "final.contigs.fa", + outputPrefix + "contigs" + readsNumber + ".fasta"}; + ProcessBuilder procBuilder = new ProcessBuilder(commandA); + ProcessBuilder procBuilderR = new ProcessBuilder(commandR); + + procBuilder.redirectErrorStream(true); + Process process = procBuilder.start(); + InputStream stdout = process.getInputStream(); + InputStreamReader isrStdout = new InputStreamReader(stdout); + BufferedReader brStdout = new BufferedReader(isrStdout); + + String line; + while ((line = brStdout.readLine()) != null) { + logger.info(line); + } + + procBuilderR.redirectErrorStream(true); + Process processB = procBuilderR.start(); + InputStream stdoutB = processB.getInputStream(); + InputStreamReader isrStdoutB = new InputStreamReader(stdoutB); + BufferedReader brStdoutB = new BufferedReader(isrStdoutB); + String lineB; + while ((lineB = brStdoutB.readLine()) != null) { + logger.info(lineB); + } + + } catch (IOException e) { + e.printStackTrace(); + logger.info(e.getMessage()); + } + } + } + + @Override + public void run() { + runAssembler(); + } +} diff --git a/src/algo/OneSequenceCalculator.java b/src/algo/OneSequenceCalculator.java index ea3d47c..aae7a67 100644 --- a/src/algo/OneSequenceCalculator.java +++ b/src/algo/OneSequenceCalculator.java @@ -37,7 +37,7 @@ public class OneSequenceCalculator implements Runnable { private boolean fail = false; public OneSequenceCalculator(String sequence, int k, int minOccurences, String outputPrefix, HashFunction hasher, BigLong2ShortHashMap reads, Logger logger, boolean bothDirections, int chunkLength, TerminationMode termMode, boolean trimPaths) { - this.sequence = sequence.toString(); + this.sequence = sequence; this.k = k; this.outputPrefix = outputPrefix; this.minOccurences = minOccurences; @@ -100,7 +100,7 @@ private void addToSubgraph(String kmer) { subgraph.put(normalizeDna(kmer), (int) reads.get(getKmerKey(kmer))); } - private boolean isContainedInSubgraph(String kmer) { + boolean isContainedInSubgraph(String kmer) { return subgraph.containsKey(normalizeDna(kmer)); } diff --git a/src/algo/ReadsFilter.java b/src/algo/ReadsFilter.java new file mode 100644 index 0000000..a337316 --- /dev/null +++ b/src/algo/ReadsFilter.java @@ -0,0 +1,86 @@ +package algo; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; + +import org.apache.log4j.Logger; +import ru.ifmo.genetics.dna.Dna; +import ru.ifmo.genetics.dna.DnaQ; +import ru.ifmo.genetics.io.ReadersUtils; +import ru.ifmo.genetics.io.formats.QualityFormat; +import ru.ifmo.genetics.io.sources.NamedSource; +import ru.ifmo.genetics.utils.tool.ExecutionFailedException; + +public class ReadsFilter implements Runnable { + private OneSequenceCalculator calc; + private File file; + private final String outputPrefix; + private final int readsNumber; + private int k; + private int percentFiltration; + private final Logger logger; + + public ReadsFilter(File file, OneSequenceCalculator calc, String outputPrefix, int readsNumber, int k, int percentFiltration, Logger logger) { + this.file = file; + this.calc = calc; + this.outputPrefix = outputPrefix; + this.readsNumber = readsNumber; + this.k = k; + this.percentFiltration = percentFiltration; + this.logger = logger; + } + + private void reader() throws ExecutionFailedException { + File outputCutReads = new File(outputPrefix + "/cutReads" + readsNumber + ".fastq"); + outputCutReads.getParentFile().mkdirs(); + NamedSource reader = null; + try { + PrintWriter out; + out = new PrintWriter(outputCutReads); + reader = ReadersUtils.readDnaLazy(file); + ReadersUtils.loadDnaQs(file); + QualityFormat qualF = ReadersUtils.determineQualityFormat(file); + NamedSource quals = ReadersUtils.readDnaQLazy(file); + int indexCutRead = 0; + for (DnaQ qual : quals) { + String read = qual.toString(); + int len = read.length(); + int kmersInRead = len - k + 1; + int kmersFiltration = kmersInRead * percentFiltration / 100; + kmersFiltration = Math.max(1, kmersFiltration); + int numberCoverageKmers = 0; + for (int i = 0; i < len - k; i++) { + if (calc.isContainedInSubgraph(read.substring(i, i + k))) { + numberCoverageKmers++; + } + if (numberCoverageKmers >= kmersFiltration) { + indexCutRead++; + out.println("@" + indexCutRead + "\n" + read + "\n+"); + for (int j = 0; j < len; j++) { + out.print(qualF.getPhredChar(qual.phredAt(j))); + } + out.println(); + break; + } + } + } + out.close(); + } catch (IOException e) { + throw new ExecutionFailedException("Failed to read from file " + file.getPath()); + } + + } + + @Override + public void run() { + try { + reader(); + } catch (ExecutionFailedException e) { + logger.info(e.getMessage()); + } + } + +} + +//--k 3 --reads reads.fastq --seq genes.fasta --maxkmers 100 --coverage 0 -o newfile \ No newline at end of file diff --git a/src/io/writers/GFAWriter.java b/src/io/writers/GFAWriter.java index 48d1f32..62ed1be 100644 --- a/src/io/writers/GFAWriter.java +++ b/src/io/writers/GFAWriter.java @@ -59,11 +59,11 @@ private void printEdge(PrintWriter out, SingleNode first, SingleNode second) { out.print("L\t"); out.print((Math.min(first.rc.id, first.id) + 1) + (first.isGeneNode ? GENE_LABEL_SUFFIX : "")); out.print('\t'); - out.print((first.id < first.rc.id ? "+" : "-")); + out.print((first.id < first.rc.id ? "-" : "+")); out.print('\t'); out.print((Math.min(second.rc.id, second.id) + 1) + (second.isGeneNode ? GENE_LABEL_SUFFIX : "")); out.print('\t'); - out.print((second.id > second.rc.id ? "+" : "-")); + out.print((second.id > second.rc.id ? "-" : "+")); out.print('\t'); out.println((k - 1) + "M"); } diff --git a/src/tools/EnvironmentAssemblerFinder.java b/src/tools/EnvironmentAssemblerFinder.java new file mode 100644 index 0000000..7302d88 --- /dev/null +++ b/src/tools/EnvironmentAssemblerFinder.java @@ -0,0 +1,254 @@ +package tools; + +import algo.AssemblerCalculator; +import algo.OneSequenceCalculator; +import algo.ReadsFilter; +import algo.TerminationMode; +import io.IOUtils; +import io.LargeKIOUtils; +import ru.ifmo.genetics.dna.DnaQ; +import ru.ifmo.genetics.io.ReadersUtils; +import ru.ifmo.genetics.structures.map.BigLong2ShortHashMap; +import ru.ifmo.genetics.utils.tool.ExecutionFailedException; +import ru.ifmo.genetics.utils.tool.Parameter; +import ru.ifmo.genetics.utils.tool.Tool; +import ru.ifmo.genetics.utils.tool.inputParameterBuilder.*; +import utils.FNV1AHash; +import utils.HashFunction; +import utils.PolynomialHash; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +public class EnvironmentAssemblerFinder extends Tool{ + public static final String NAME = "environment-assembler-finder"; + public static final String DESCRIPTION = "Finds graphic environment for many genomic sequences in given metagenomic reads in 3 stages using assembler"; + + public static final int DEFAULT_MAX_THREADS = 32; + + public final Parameter k = addParameter(new IntParameterBuilder("k") + .mandatory() + .withShortOpt("k") + .withDescription("k-mer size") + .withDefaultValue(21) + .create()); + + public final Parameter readsFiles = addParameter(new FileMVParameterBuilder("reads") + .withShortOpt("i") + .withDescription("FASTQ, BINQ, FASTA reads") + .withDefaultValue(new File[]{}) + .create()); + + public final Parameter seqsFile = addParameter(new FileParameterBuilder("seq") + .mandatory() + .withDescription("FASTA file with sequences") + .create()); + + + public final Parameter outputDir = addParameter(new FileParameterBuilder("output") + .mandatory() + .withShortOpt("o") + .withDescription("output directory") + .create()); + + public final Parameter maxKmers = addParameter(new IntParameterBuilder("maxkmers") +// .withDefaultValue(100000) + .withDescription("maximum number of k-mers in created subgraph") + .create()); + + public final Parameter maxRadius = addParameter(new IntParameterBuilder("maxradius") +// .withDefaultValue(100000) + .withDescription("maximum distance in k-mers from starting gene") + .create()); + + public final Parameter minCoverage = addParameter(new IntParameterBuilder("coverage") + .withDescription("minimum depth of k-mers to consider") + .withDefaultValue(1) + .create()); + + public final Parameter bothDirections = addParameter(new BoolParameterBuilder("bothdirs") + .withDescription("run graph search in both directions from starting sequence") + .withDefaultValue(false) + .create()); + + public final Parameter chunkLength = addParameter(new IntParameterBuilder("chunklength") + .withDescription("minimum node length for BLAST search") + .withDefaultValue(1) + .create()); + + public final Parameter forceHashing = addParameter(new BoolParameterBuilder("forcehash") + .withDescription("force k-mer hashing (even for k <= 31)") + .withDefaultValue(false) + .create()); + + public final Parameter hashFunction = addParameter(new StringParameterBuilder("hash") + .withDescription("hash function to use: poly or fnv1a") + .withDefaultValue("poly") + .create()); + + public final Parameter maxThreads = addParameter(new IntParameterBuilder("threads") + .withDescription("how many java threads to use") + .withDefaultValue(DEFAULT_MAX_THREADS) + .create() + ); + + public final Parameter trimPaths = addParameter(new BoolParameterBuilder("trim") + .withDescription("trim all not maximal paths?") + .withDefaultValue(false) + .create() + ); + + public final Parameter percentFiltration = addParameter(new IntParameterBuilder("procfiltration") + .mandatory() + .withShortOpt("f") + .withDescription("filtration percent // [1 .. 100]") + .withDefaultValue(1) + .create()); + + public final Parameter assembler = addParameter(new StringParameterBuilder("assembler") + .mandatory() + .withDescription("assembler which you want to use") + //.withDefaultValue("None") + .create()); + + public final Parameter assemblerPath = addParameter(new StringParameterBuilder("assemblerpath") + .mandatory() + .withDescription("path of the assembler") + //.withDefaultValue("") + .create()); + + private BigLong2ShortHashMap reads; + private List sequences; + private HashFunction hasher; + + public void loadInput() throws ExecutionFailedException { + if (k.get() > 31 || forceHashing.get()) { + logger.info("Reading hashes of k-mers instead"); + this.hasher = LargeKIOUtils.hash = determineHashFunction(); + this.reads = LargeKIOUtils.loadReads(readsFiles.get(), k.get(), 0, + availableProcessors.get(), logger); + } else { + this.reads = IOUtils.loadReads(readsFiles.get(), k.get(), 0, + availableProcessors.get(), logger); + } + logger.info("Hashtable size: " + this.reads.size() + " kmers"); + try { + this.sequences = ReadersUtils.loadDnaQs(seqsFile.get()); + } catch (IOException e) { + throw new ExecutionFailedException("Could not load sequences from " + seqsFile.get().getPath()); + } + } + + + private HashFunction determineHashFunction() { + if (k.get() <= 31 && !forceHashing.get()) { + return null; + } + String name = hashFunction.get().toLowerCase(); + if (name.equals("fnv1a")) { + logger.info("Using FNV1a hash function"); + return new FNV1AHash(); + } else { + logger.info("Using default polynomial hash function"); + return new PolynomialHash(); + } + } + + private TerminationMode getTerminationMode() throws ExecutionFailedException { + TerminationMode termMode = new TerminationMode(); + if (maxKmers.get() == null && maxRadius.get() == null) { + throw new ExecutionFailedException("At least one of --maxkmers and --maxradius parameters should be set"); + } + if (maxKmers.get() != null) { + termMode.addRestriction(TerminationMode.TerminationModeType.MAX_KMERS, maxKmers.get()); + } + if (maxRadius.get() != null) { + termMode.addRestriction(TerminationMode.TerminationModeType.MAX_RADIUS, maxRadius.get()); + } + return termMode; + } + + @Override + protected void runImpl() throws ExecutionFailedException { + loadInput(); + + if (sequences.size() > 1) { + logger.info("EnvironmentAssemblerFinder works only with one input sequence!"); + return; + } + + String outputPrefix = outputDir.get().getPath() + "/"; + OneSequenceCalculator calc = new OneSequenceCalculator(sequences.get(0).toString(), k.get(), + minCoverage.get(), outputPrefix, this.hasher, reads, logger, + bothDirections.get(), chunkLength.get(), getTerminationMode(), trimPaths.get()); + calc.run(); + ExecutorService execService = Executors.newFixedThreadPool(maxThreads.get()); + for (int i = 0; i < readsFiles.get().length; i++) { + execService.execute(new ReadsFilter(readsFiles.get()[i], calc, outputPrefix, i, k.get(), + percentFiltration.get(), logger)); + } + execService.shutdown(); + try { + while (!execService.awaitTermination(100, TimeUnit.MINUTES)){} + } catch (InterruptedException e) { + e.printStackTrace(); + } + logger.info("Filtration done!"); + logger.info("Finished processing all sequences!"); + + ExecutorService execServiceAsm = Executors.newFixedThreadPool(maxThreads.get()); + for (int i = 0; i < readsFiles.get().length; i++) { + execServiceAsm.execute(new AssemblerCalculator(assembler.get(), assemblerPath.get(), outputPrefix, i, logger)); + } + execServiceAsm.shutdown(); + try { + while (!execServiceAsm.awaitTermination(100, TimeUnit.MINUTES)){} + } catch (InterruptedException e) { + e.printStackTrace(); + } + logger.info("Finished assembling all sequences!"); + + outputDir.set(new File(outputDir.get() + "/result")); + File[] f = new File[readsFiles.get().length]; + for (int i = 0; i < readsFiles.get().length; i++) { + f[i] = new File(outputPrefix + "contigs" + i + ".fasta"); + } + readsFiles.set(f); + k.set(55); + minCoverage.set(0); + + loadInput(); + outputPrefix = outputDir.get().getPath() + "/"; + + calc = new OneSequenceCalculator(sequences.get(0).toString(), k.get(), + minCoverage.get(), outputPrefix, this.hasher, reads, logger, + bothDirections.get(), chunkLength.get(), getTerminationMode(), trimPaths.get()); + calc.run(); + ExecutorService execService2 = Executors.newFixedThreadPool(maxThreads.get()); + for (int i = 0; i < readsFiles.get().length; i++) { + execService2.execute(new ReadsFilter(readsFiles.get()[i], calc, outputPrefix, i, k.get(), + percentFiltration.get(), logger)); + } + execService2.shutdown(); + logger.info("Filtration done!"); + logger.info("Finished processing all sequences!"); + } + + @Override + protected void cleanImpl() { + + } + + public EnvironmentAssemblerFinder() { + super(NAME, DESCRIPTION); + } + + public static void main(String[] args) { + new EnvironmentAssemblerFinder().mainImpl(args); + } +} +//--k 3 --reads reads.fasta --seq genes.fasta -o ress --assembler megahit --assemblerpath /home/mariya/megahit/megahit --maxkmers 1000 --coverage 0 --procfiltration 100 diff --git a/src/tools/EnvironmentFinderMain.java b/src/tools/EnvironmentFinderMain.java index 0d8d3dd..691d00a 100644 --- a/src/tools/EnvironmentFinderMain.java +++ b/src/tools/EnvironmentFinderMain.java @@ -1,6 +1,7 @@ package tools; import algo.OneSequenceCalculator; +import algo.ReadsFilter; import algo.TerminationMode; import algo.TerminationMode.TerminationModeType; import io.*; @@ -31,6 +32,7 @@ public class EnvironmentFinderMain extends Tool { .mandatory() .withShortOpt("k") .withDescription("k-mer size") + .withDefaultValue(21) .create()); public final Parameter readsFiles = addParameter(new FileMVParameterBuilder("reads") @@ -98,11 +100,19 @@ public class EnvironmentFinderMain extends Tool { .create() ); + public final Parameter percentFiltration = addParameter(new IntParameterBuilder("procfiltration") + .mandatory() + .withShortOpt("f") + .withDescription("filtration percent // [1 .. 100]") + .withDefaultValue(1) + .create()); + + private BigLong2ShortHashMap reads; private List sequences; private HashFunction hasher; - private void loadInput() throws ExecutionFailedException { + public void loadInput() throws ExecutionFailedException { if (k.get() > 31 || forceHashing.get()) { logger.info("Reading hashes of k-mers instead"); this.hasher = LargeKIOUtils.hash = determineHashFunction(); @@ -120,6 +130,7 @@ private void loadInput() throws ExecutionFailedException { } } + private HashFunction determineHashFunction() { if (k.get() <= 31 && !forceHashing.get()) { return null; @@ -136,9 +147,6 @@ private HashFunction determineHashFunction() { private TerminationMode getTerminationMode() throws ExecutionFailedException { TerminationMode termMode = new TerminationMode(); -// if (maxKmers.get() != null && maxRadius.get() != null) { -// throw new ExecutionFailedException("--maxkmers and --maxradius parameters cannot be both set"); -// } if (maxKmers.get() == null && maxRadius.get() == null) { throw new ExecutionFailedException("At least one of --maxkmers and --maxradius parameters should be set"); } @@ -155,31 +163,27 @@ private TerminationMode getTerminationMode() throws ExecutionFailedException { protected void runImpl() throws ExecutionFailedException { loadInput(); ExecutorService execService = Executors.newFixedThreadPool(maxThreads.get()); -// Thread[] calculators = new Thread[sequences.size()]; -// for (int i = 0; i < calculators.length; i++) { -// String outputPrefix = getOutputPrefix(i); //outputDir.get().getPath() + "/" + (i + 1) + "/"; -// calculators[i] = new Thread(new OneSequenceCalculator(sequences.get(i).toString(), k.get(), -// minCoverage.get(), outputPrefix, this.hasher, reads, logger, -// bothDirections.get(), maxKmers.get(), chunkLength.get())); -// calculators[i].start(); -// } -// -// try { -// for (int i = 0; i < calculators.length; i++) { -// calculators[i].join(); -// } -// } catch (InterruptedException e) { -// e.printStackTrace(); -// } - - for (int i = 0; i < sequences.size(); i++) { - String outputPrefix = getOutputPrefix(i); - execService.execute(new OneSequenceCalculator(sequences.get(i).toString(), k.get(), + if (sequences.size() == 1) { + String outputPrefix = getOutputPrefix(0); + OneSequenceCalculator calc = new OneSequenceCalculator(sequences.get(0).toString(), k.get(), minCoverage.get(), outputPrefix, this.hasher, reads, logger, - bothDirections.get(), chunkLength.get(), getTerminationMode(), trimPaths.get())); + bothDirections.get(), chunkLength.get(), getTerminationMode(), trimPaths.get()); + calc.run(); + for (int i = 0; i < readsFiles.get().length; i++) { + execService.execute(new ReadsFilter(readsFiles.get()[i], calc, outputPrefix, i, k.get(), + percentFiltration.get(), logger)); + } + logger.info("Filtration done!"); + } else { + for (int i = 0; i < sequences.size(); i++) { + String outputPrefix = getOutputPrefix(i); + execService.execute(new OneSequenceCalculator(sequences.get(i).toString(), k.get(), + minCoverage.get(), outputPrefix, this.hasher, reads, logger, + bothDirections.get(), chunkLength.get(), getTerminationMode(), trimPaths.get())); + } } - execService.shutdown(); + execService.shutdown(); logger.info("Finished processing all sequences!"); } diff --git a/src/tools/EnvironmentFinderMultiMain.java b/src/tools/EnvironmentFinderMultiMain.java index 00b5cfb..99f43e4 100644 --- a/src/tools/EnvironmentFinderMultiMain.java +++ b/src/tools/EnvironmentFinderMultiMain.java @@ -1,6 +1,6 @@ package tools; -import algo.MultiSequenceCalculator; +import algo.MultiSequenceCalculator; import io.RichFastaReader; import io.graph.DeBruijnGraphUtils; import ru.ifmo.genetics.utils.tool.ExecutionFailedException; @@ -11,6 +11,7 @@ import java.io.*; import java.util.*; + public class EnvironmentFinderMultiMain extends Tool { public static final String NAME = "environment-finder-multi"; public static final String DESCRIPTION = "Displays difference between multiple genomic environments"; @@ -64,7 +65,7 @@ private void loadInput() throws ExecutionFailedException { } } } - + try { RichFastaReader rfr = new RichFastaReader(seqFile.get()); sequence = rfr.getDnas().get(geneId.get() - 1).toString(); @@ -72,7 +73,7 @@ private void loadInput() throws ExecutionFailedException { } catch (FileNotFoundException e) { throw new ExecutionFailedException("Could not load sequence file", e); } - + } @@ -81,7 +82,11 @@ protected void runImpl() throws ExecutionFailedException { MultiSequenceCalculator calc = new MultiSequenceCalculator(sequence, k, outputDir.get().getPath(), logger, graphs); calc.run(); printGene(); + printProbability(); + logger.info("Finished processing!"); + + } private void printGene() { @@ -96,6 +101,70 @@ private void printGene() { e.printStackTrace(); } } + + private void printProbability(){ + File outputSym = new File(outputDir.get().getPath() + "/Jacard_sym.txt"); + File outputAlt = new File(outputDir.get().getPath() + "/Jacard_alt.txt"); + PrintWriter outSym; + PrintWriter outAlt; + try { + outSym = new PrintWriter(outputSym); + outAlt = new PrintWriter(outputAlt); + + outSym.println("The" + "[31mWarning! " + "symmetric <> (1 - AB/AUB):"); + outSym.println(); + outAlt.println("The" + "[31mWarning! " + "alternative <> (1 - AB/A):"); + outAlt.println(); + int[][] difference = new int[graphs.length][graphs.length]; + int[][] differenceAlt = new int[graphs.length][graphs.length]; + int[][] union = new int[graphs.length][graphs.length]; + int i = 0; + for (Map graphF: graphs){ + int j = 0; + for (Map graphS: graphs){ + for (String kmer : graphF.keySet()) { + if (graphS.containsKey(kmer) == false){ + difference[i][j] += graphF.get(kmer); + differenceAlt[i][j] += graphF.get(kmer); + union[i][j] += graphF.get(kmer); + } + else{ + difference[i][j] += Math.abs(graphF.get(kmer) - graphS.get(kmer)); + differenceAlt[i][j] += Math.abs(graphF.get(kmer) - graphS.get(kmer)); + union[i][j] += Math.max(graphF.get(kmer), graphS.get(kmer)); + } + + } + for (String kmer : graphS.keySet()) { + if (graphF.containsKey(kmer) == false){ + difference[i][j] += graphS.get(kmer); + union[i][j] += graphS.get(kmer); + } + } + j++; + } + i++; + } + + for (i = 0; i < graphs.length; i++){ + outSym.print(envFiles.get()[i]); + outAlt.print(envFiles.get()[i]); + for (int j = 0; j < graphs.length; j++){ + int intersection = union[i][j] - difference[i][j]; + outSym.printf("%6.2f ", 1 - intersection/(float)union[i][j]); + outAlt.printf("%6.2f ", 1 - intersection/(float)(union[i][j] - differenceAlt[i][j])); + } + outSym.println(); + outAlt.println(); + } + + outSym.close(); + outAlt.close(); + + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } @Override