Commit 0636ae0a authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

First version of VcfStats tool for #93

parent 06536548
library('gplots')
args <- commandArgs(TRUE)
inputArg <- args[1]
outputArg <- args[2]
outputArgClustering <- args[3]
col <- heat.colors(250)
col[250] <- "#00FF00"
heat<-read.table(inputArg, header = 1, sep= '\t')
rownames(heat) <- heat[,1]
heat<- heat[,-1]
heat<- as.matrix(heat)
png(file = outputArg, width = 1000, height = 1000)
heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none")
dev.off()
png(file = outputArgClustering, width = 1000, height = 1000)
heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row")
dev.off()
package nl.lumc.sasc.biopet.tools
import java.io.{ PrintWriter, File }
import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.core.ToolCommand
import org.broadinstitute.gatk.utils.R.RScriptExecutor
import org.broadinstitute.gatk.utils.io.Resource
import scala.collection.JavaConversions._
import scala.collection.mutable
/**
* Created by pjvan_thof on 1/10/15.
*/
class VcfStats {
//TODO: add Queue wrapper
}
object VcfStats extends ToolCommand {
case class Args(inputFile: File = null, outputDir: String = null) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "inputFile") required () unbounded () valueName ("<file>") action { (x, c) =>
c.copy(inputFile = x)
}
opt[String]('o', "outputDir") required () unbounded () valueName ("<file>") action { (x, c) =>
c.copy(outputDir = x)
}
}
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
logger.info("Started")
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
val reader = new VCFFileReader(commandArgs.inputFile, false)
val header = reader.getFileHeader
val samples = header.getSampleNamesInOrder.toList
val genotypeOverlap: mutable.Map[String, mutable.Map[String, Int]] = mutable.Map()
for (record <- reader) {
for (sample1 <- samples) {
for (sample2 <- samples) {
if (record.getGenotype(sample1).getAlleles == record.getGenotype(sample2).getAlleles) {
if (!genotypeOverlap.contains(sample1)) genotypeOverlap(sample1) = mutable.Map()
val current = genotypeOverlap(sample1).getOrElse(sample2, 0)
genotypeOverlap(sample1)(sample2) = current + 1
}
}
}
}
writeOverlap(genotypeOverlap, commandArgs.outputDir + "/sample_genotype_overlap", samples)
plot(new File(commandArgs.outputDir + "/sample_genotype_overlap.rel.tsv"))
logger.info("Done")
}
def writeOverlap(overlap: mutable.Map[String, mutable.Map[String, Int]], prefix: String, samples: List[String]): Unit = {
val absWriter = new PrintWriter(new File(prefix + ".abs.tsv"))
val relWriter = new PrintWriter(new File(prefix + ".rel.tsv"))
absWriter.println(samples.mkString("\t", "\t", ""))
relWriter.println(samples.mkString("\t", "\t", ""))
for (sample1 <- samples) {
val values = for (sample2 <- samples) yield overlap.getOrElse(sample1, mutable.Map()).getOrElse(sample2, 0)
absWriter.println(values.mkString(sample1 + "\t", "\t", ""))
val total = overlap.getOrElse(sample1, mutable.Map()).getOrElse(sample1, 0)
relWriter.println(values.map(_.toFloat / total).mkString(sample1 + "\t", "\t", ""))
}
absWriter.close()
relWriter.close()
}
def plot(file: File) {
val executor = new RScriptExecutor
executor.addScript(new Resource("plotHeatmap.R", getClass))
executor.addArgs(file, file.getAbsolutePath.stripSuffix(".tsv") + ".png", file.getAbsolutePath.stripSuffix(".tsv") + ".clustering.png")
executor.exec()
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment