diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala deleted file mode 100644 index c3f5add4b52092e607e1c4975745e7f58793dd08..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala +++ /dev/null @@ -1,87 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants -import nl.lumc.sasc.biopet.extensions.gatk.SelectVariants -import nl.lumc.sasc.biopet.extensions.gatk.VariantEval -import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.utils.commandline.{ Input, Argument } - -class GatkVcfSampleCompare(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "Sample vcf file(s)", shortName = "V") - var vcfFiles: List[File] = _ - - @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = config("reference") - - @Argument(doc = "Target bed", shortName = "targetBed", required = false) - var targetBed: List[File] = Nil - - @Argument(doc = "Samples", shortName = "sample", required = false) - var samples: List[String] = Nil - - var vcfFile: File = _ - var sampleVcfs: Map[String, File] = Map() - def generalSampleDir = outputDir + "samples/" - - def init() { - if (config.contains("target_bed")) - for (bed <- config("target_bed").asList) - targetBed :+= bed.toString - if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") - else if (!outputDir.endsWith("/")) outputDir += "/" - } - - def biopetScript() { - vcfFile = if (vcfFiles.size > 1) { - val combineVariants = CombineVariants(this, vcfFiles, outputDir + "merge.vcf") - add(combineVariants) - combineVariants.out - } else vcfFiles.head - - for (sample <- samples) { - sampleVcfs += (sample -> new File(generalSampleDir + sample + File.separator + sample + ".vcf")) - val selectVariants = SelectVariants(this, vcfFile, sampleVcfs(sample)) - selectVariants.sample_name = Seq(sample) - selectVariants.excludeNonVariants = true - add(selectVariants) - } - - val sampleCompareMetrics = new SampleCompareMetrics(this) - sampleCompareMetrics.samples = samples - sampleCompareMetrics.sampleDir = generalSampleDir - sampleCompareMetrics.snpRelFile = outputDir + "compare.snp.rel.tsv" - sampleCompareMetrics.snpAbsFile = outputDir + "compare.snp.abs.tsv" - sampleCompareMetrics.indelRelFile = outputDir + "compare.indel.rel.tsv" - sampleCompareMetrics.indelAbsFile = outputDir + "compare.indel.abs.tsv" - sampleCompareMetrics.totalFile = outputDir + "total.tsv" - - for ((sample, sampleVcf) <- sampleVcfs) { - val sampleDir = generalSampleDir + sample + File.separator - for ((compareSample, compareSampleVcf) <- sampleVcfs) { - val variantEval = VariantEval(this, - sampleVcf, - compareSampleVcf, - new File(sampleDir + sample + "-" + compareSample + ".eval.txt"), - Seq("VariantType", "CompRod"), - Seq("CompOverlap") - ) - if (targetBed != null) variantEval.L = targetBed - add(variantEval) - sampleCompareMetrics.deps ::= variantEval.out - } - } - add(sampleCompareMetrics) - } -} - -object GatkVcfSampleCompare extends PipelineCommand diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/SampleCompareMetrics.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/SampleCompareMetrics.scala deleted file mode 100644 index 861455fe4d886219813409023d022ea096f413c9..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/SampleCompareMetrics.scala +++ /dev/null @@ -1,153 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import java.io.File -import java.io.PrintWriter -import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction -import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.R.RScriptExecutor -import org.broadinstitute.gatk.utils.commandline.{ Output, Argument } -import scala.io.Source -import org.broadinstitute.gatk.utils.R.{ RScriptLibrary, RScriptExecutor } -import org.broadinstitute.gatk.utils.io.Resource -import scala.collection.mutable.Map -import scala.math._ - -class SampleCompareMetrics(val root: Configurable) extends BiopetJavaCommandLineFunction { - javaMainClass = getClass.getName - - @Argument(doc = "Sample Dir", shortName = "sampleDir", required = true) - var sampleDir: String = _ - - @Argument(doc = "Samples", shortName = "sample", required = true) - var samples: List[String] = Nil - - @Argument(doc = "File sufix", shortName = "sufix", required = false) - var fileSufix: String = _ - - @Output(doc = "snpRelFile", shortName = "snpRelFile", required = true) - var snpRelFile: File = _ - - @Output(doc = "snpAbsFile", shortName = "snpAbsFile", required = true) - var snpAbsFile: File = _ - - @Output(doc = "indelRelFile", shortName = "indelRelFile", required = true) - var indelRelFile: File = _ - - @Output(doc = "indelAbsFile", shortName = "indelAbsFile", required = true) - var indelAbsFile: File = _ - - @Output(doc = "totalFile", shortName = "totalFile", required = true) - var totalFile: File = _ - - override val defaultVmem = "8G" - memoryLimit = Option(4.0) - - override def commandLine = super.commandLine + - required("-sampleDir", sampleDir) + - repeat("-sample", samples) + - optional("-fileSufix", fileSufix) + - required("-snpRelFile", snpRelFile) + - required("-snpAbsFile", snpAbsFile) + - required("-indelRelFile", indelRelFile) + - required("-indelAbsFile", indelAbsFile) + - required("-totalFile", totalFile) -} - -object SampleCompareMetrics { - var sampleDir: String = _ - var samples: List[String] = Nil - var fileSufix: String = ".eval.txt" - var snpRelFile: File = _ - var snpAbsFile: File = _ - var indelRelFile: File = _ - var indelAbsFile: File = _ - var totalFile: File = _ - /** - * @param args the command line arguments - */ - def main(args: Array[String]): Unit = { - - for (t <- 0 until args.size) { - args(t) match { - case "-sample" => samples +:= args(t + 1) - case "-sampleDir" => sampleDir = args(t + 1) - case "-fileSufix" => fileSufix = args(t + 1) - case "-snpRelFile" => snpRelFile = new File(args(t + 1)) - case "-snpAbsFile" => snpAbsFile = new File(args(t + 1)) - case "-indelRelFile" => indelRelFile = new File(args(t + 1)) - case "-indelAbsFile" => indelAbsFile = new File(args(t + 1)) - case "-totalFile" => totalFile = new File(args(t + 1)) - case _ => - } - } - if (sampleDir == null) throw new IllegalStateException("No sampleDir, use -sampleDir") - else if (!sampleDir.endsWith("/")) sampleDir += "/" - - val regex = """\W+""".r - val snpsOverlap: Map[(String, String), Int] = Map() - val indelsOverlap: Map[(String, String), Int] = Map() - val snpsTotal: Map[String, Int] = Map() - val indelsTotal: Map[String, Int] = Map() - for (sample1 <- samples; sample2 <- samples) { - val reader = Source.fromFile(new File(sampleDir + sample1 + "/" + sample1 + "-" + sample2 + fileSufix)) - for (line <- reader.getLines) { - regex.split(line) match { - case Array(_, _, _, varType, all, novel, overlap, rate, _*) => { - varType match { - case "SNP" => { - snpsOverlap += (sample1, sample2) -> overlap.toInt - snpsTotal += sample1 -> all.toInt - } - case "INDEL" => { - indelsOverlap += (sample1, sample2) -> overlap.toInt - indelsTotal += sample1 -> all.toInt - } - case _ => - } - } - case _ => - } - } - reader.close() - } - - val snpRelWritter = new PrintWriter(snpRelFile) - val snpAbsWritter = new PrintWriter(snpAbsFile) - val indelRelWritter = new PrintWriter(indelRelFile) - val indelAbsWritter = new PrintWriter(indelAbsFile) - - val allWritters = List(snpRelWritter, snpAbsWritter, indelRelWritter, indelAbsWritter) - for (writter <- allWritters) writter.println(samples.mkString("\t", "\t", "")) - for (sample1 <- samples) { - for (writter <- allWritters) writter.print(sample1) - for (sample2 <- samples) { - snpRelWritter.print("\t" + (round((snpsOverlap(sample1, sample2).toDouble / snpsTotal(sample1) * 10000.0)) / 10000.0)) - snpAbsWritter.print("\t" + snpsOverlap(sample1, sample2)) - indelRelWritter.print("\t" + (round((indelsOverlap(sample1, sample2).toDouble / indelsTotal(sample1) * 10000.0)) / 10000.0)) - indelAbsWritter.print("\t" + indelsOverlap(sample1, sample2)) - } - for (writter <- allWritters) writter.println() - } - for (writter <- allWritters) writter.close() - - val totalWritter = new PrintWriter(totalFile) - totalWritter.println("Sample\tSNPs\tIndels") - for (sample <- samples) - totalWritter.println(sample + "\t" + snpsTotal(sample) + "\t" + indelsTotal(sample)) - totalWritter.close() - - def plot(file: File) { - val executor = new RScriptExecutor - executor.addScript(new Resource("plotHeatmap.R", getClass)) - executor.addArgs(file, file.getAbsolutePath.stripSuffix(".tsv") + ".png", file.getAbsolutePath.stripSuffix(".tsv") + ".clustering.png") - executor.exec() - } - plot(snpRelFile) - plot(indelRelFile) - } -} \ No newline at end of file diff --git a/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutableProtected.scala b/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutableProtected.scala index 9457fd36cfab986c5fdc9d5cbc29836ced4e0962..902e292fca017947affb287249ccff764e943100 100644 --- a/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutableProtected.scala +++ b/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutableProtected.scala @@ -12,7 +12,6 @@ object BiopetExecutableProtected extends BiopetExecutable { nl.lumc.sasc.biopet.pipelines.gatk.GatkVariantcalling, nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline, nl.lumc.sasc.biopet.pipelines.gatk.GatkVariantRecalibration, - nl.lumc.sasc.biopet.pipelines.gatk.GatkVcfSampleCompare, nl.lumc.sasc.biopet.pipelines.basty.Basty) def tools = BiopetExecutablePublic.tools diff --git a/public/biopet-framework/src/main/resources/nl/lumc/sasc/biopet/pipelines/gatk/plotHeatmap.R b/public/biopet-framework/src/main/resources/nl/lumc/sasc/biopet/pipelines/gatk/plotHeatmap.R deleted file mode 100644 index 4158db708d58c8cc19b535dcfe871c626fa51ad6..0000000000000000000000000000000000000000 --- a/public/biopet-framework/src/main/resources/nl/lumc/sasc/biopet/pipelines/gatk/plotHeatmap.R +++ /dev/null @@ -1,24 +0,0 @@ -library('gplots') - -args <- commandArgs(TRUE) -inputArg <- args[1] -outputArg <- args[2] -outputArgClustering <- args[3] - -col <- heat.colors(250) -col[250] <- "#00FF00" - -heat<-read.table(inputArg, header = 1, sep= '\t') -rownames(heat) <- heat[,1] -heat<- heat[,-1] - -heat<- as.matrix(heat) - -png(file = outputArg, width = 1000, height = 1000) -heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none") -dev.off() - - -png(file = outputArgClustering, width = 1000, height = 1000) -heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row") -dev.off()