Skip to content
Snippets Groups Projects
Commit 2e19d016 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Add more functionality

parent 0636ae0a
No related branches found
No related tags found
No related merge requests found
...@@ -2,6 +2,7 @@ package nl.lumc.sasc.biopet.tools ...@@ -2,6 +2,7 @@ package nl.lumc.sasc.biopet.tools
import java.io.{ PrintWriter, File } import java.io.{ PrintWriter, File }
import htsjdk.variant.variantcontext.Genotype
import htsjdk.variant.vcf.VCFFileReader import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.core.ToolCommand
import org.broadinstitute.gatk.utils.R.RScriptExecutor import org.broadinstitute.gatk.utils.R.RScriptExecutor
...@@ -28,6 +29,11 @@ object VcfStats extends ToolCommand { ...@@ -28,6 +29,11 @@ object VcfStats extends ToolCommand {
} }
} }
val genotypeOverlap: mutable.Map[String, mutable.Map[String, Int]] = mutable.Map()
val variantOverlap: mutable.Map[String, mutable.Map[String, Int]] = mutable.Map()
val genotypeStats: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map()
/** /**
* @param args the command line arguments * @param args the command line arguments
*/ */
...@@ -40,29 +46,90 @@ object VcfStats extends ToolCommand { ...@@ -40,29 +46,90 @@ object VcfStats extends ToolCommand {
val header = reader.getFileHeader val header = reader.getFileHeader
val samples = header.getSampleNamesInOrder.toList val samples = header.getSampleNamesInOrder.toList
val genotypeOverlap: mutable.Map[String, mutable.Map[String, Int]] = mutable.Map() // Init
for (sample1 <- samples) {
genotypeOverlap(sample1) = mutable.Map()
variantOverlap(sample1) = mutable.Map()
genotypeStats(sample1) = mutable.Map()
for (sample2 <- samples) {
genotypeOverlap(sample1)(sample2) = 0
variantOverlap(sample1)(sample2) = 0
}
}
// Reading vcf records
logger.info("Start reading vcf records")
for (record <- reader) { for (record <- reader) {
for (sample1 <- samples) { for (sample1 <- samples) {
val genotype = record.getGenotype(sample1)
checkGenotype(genotype)
for (sample2 <- samples) { for (sample2 <- samples) {
if (record.getGenotype(sample1).getAlleles == record.getGenotype(sample2).getAlleles) { if (genotype.getAlleles == record.getGenotype(sample2).getAlleles) {
if (!genotypeOverlap.contains(sample1)) genotypeOverlap(sample1) = mutable.Map() genotypeOverlap(sample1)(sample2) = genotypeOverlap(sample1)(sample2) + 1
val current = genotypeOverlap(sample1).getOrElse(sample2, 0) if (!(genotype.isHomRef || genotype.isNoCall || genotype.isNonInformative))
genotypeOverlap(sample1)(sample2) = current + 1 variantOverlap(sample1)(sample2) = variantOverlap(sample1)(sample2) + 1
} }
} }
} }
} }
logger.info("Done reading vcf records")
writeOverlap(genotypeOverlap, commandArgs.outputDir + "/sample_genotype_overlap", samples) writeGenotypeFields(commandArgs.outputDir + "/genotype_", samples)
writeOverlap(genotypeOverlap, commandArgs.outputDir + "/sample_compare/genotype_overlap", samples)
plot(new File(commandArgs.outputDir + "/sample_genotype_overlap.rel.tsv")) writeOverlap(variantOverlap, commandArgs.outputDir + "/sample_compare/variant_overlap", samples)
logger.info("Done") logger.info("Done")
} }
def checkGenotype(genotype: Genotype): Unit = {
val sample = genotype.getSampleName
val dp = if (genotype.hasDP) genotype.getDP else "not set"
if (!genotypeStats(sample).contains("DP")) genotypeStats(sample)("DP") = mutable.Map()
genotypeStats(sample)("DP")(dp) = genotypeStats(sample)("DP").getOrElse(dp, 0) + 1
val gq = if (genotype.hasGQ) genotype.getGQ else "not set"
if (!genotypeStats(sample).contains("GQ")) genotypeStats(sample)("GQ") = mutable.Map()
genotypeStats(sample)("GQ")(gq) = genotypeStats(sample)("DP").getOrElse(gq, 0) + 1
//TODO: add AD field
}
def writeGenotypeFields(prefix: String, samples: List[String]) {
val fields = List("DP", "GQ")
for (field <- fields) {
val file = new File(prefix + field + ".tsv")
file.getParentFile.mkdirs()
val writer = new PrintWriter(file)
writer.println(samples.mkString("\t", "\t", ""))
val keySet = (for (sample <- samples) yield genotypeStats(sample)(field).keySet).fold(Set[Any]())(_ ++ _)
for (key <- keySet.toList.sortWith(sortAnyAny(_, _))) {
val values = for (sample <- samples) yield genotypeStats(sample)(field).getOrElse(key, 0)
writer.println(values.mkString(key + "\t", "\t", ""))
}
writer.close()
}
}
def sortAnyAny(a: Any, b: Any): Boolean = {
a match {
case ai: Int => {
b match {
case bi: Int => ai < bi
case _ => a.toString < b.toString
}
}
case _ => a.toString < b.toString
}
}
def writeOverlap(overlap: mutable.Map[String, mutable.Map[String, Int]], prefix: String, samples: List[String]): Unit = { def writeOverlap(overlap: mutable.Map[String, mutable.Map[String, Int]], prefix: String, samples: List[String]): Unit = {
val absWriter = new PrintWriter(new File(prefix + ".abs.tsv")) val absFile = new File(prefix + ".abs.tsv")
val relWriter = new PrintWriter(new File(prefix + ".rel.tsv")) val relFile = new File(prefix + ".rel.tsv")
absFile.getParentFile.mkdirs()
val absWriter = new PrintWriter(absFile)
val relWriter = new PrintWriter(relFile)
absWriter.println(samples.mkString("\t", "\t", "")) absWriter.println(samples.mkString("\t", "\t", ""))
relWriter.println(samples.mkString("\t", "\t", "")) relWriter.println(samples.mkString("\t", "\t", ""))
...@@ -75,9 +142,11 @@ object VcfStats extends ToolCommand { ...@@ -75,9 +142,11 @@ object VcfStats extends ToolCommand {
} }
absWriter.close() absWriter.close()
relWriter.close() relWriter.close()
plotHeatmap(relFile)
} }
def plot(file: File) { def plotHeatmap(file: File) {
val executor = new RScriptExecutor val executor = new RScriptExecutor
executor.addScript(new Resource("plotHeatmap.R", getClass)) executor.addScript(new Resource("plotHeatmap.R", getClass))
executor.addArgs(file, file.getAbsolutePath.stripSuffix(".tsv") + ".png", file.getAbsolutePath.stripSuffix(".tsv") + ".clustering.png") executor.addArgs(file, file.getAbsolutePath.stripSuffix(".tsv") + ".png", file.getAbsolutePath.stripSuffix(".tsv") + ".clustering.png")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment