diff --git a/docs/tools/SamplesTsvToJson.md b/docs/tools/SamplesTsvToJson.md index dc17f44568eb4e5252ec5aaa5a42cb90d6746294..84a33413e9d98bd110d02952aaa46a4891cf59e1 100644 --- a/docs/tools/SamplesTsvToJson.md +++ b/docs/tools/SamplesTsvToJson.md @@ -58,7 +58,7 @@ To get the above example out of the tool one should provide 2 TSV files as follo ---- -| samples | library | bam | +| sample | library | bam | | ------- | ------- | --------- | |Sample_ID_1 |Lib_ID_1 |MyFirst.bam | |Sample_ID_2 |Lib_ID_2 |MySecond.bam | diff --git a/public/biopet-framework/pom.xml b/public/biopet-framework/pom.xml index cc520eb6c3de5126905c59696164fd98b712cd7b..2517acae3020b2542f7e77f66f9edd522c11c32a 100644 --- a/public/biopet-framework/pom.xml +++ b/public/biopet-framework/pom.xml @@ -61,7 +61,7 @@ <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> - <version>2.10.2</version> + <version>2.10.4</version> </dependency> <dependency> <groupId>org.broadinstitute.gatk</groupId> diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala index 1d236b57c49b3cf436380605f35af9e780670052..8eacd72b9b248b3c7f182eeeacf53b1bd084adf5 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala @@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.tools import java.io.{ File, PrintWriter } -import htsjdk.samtools.SamReaderFactory +import htsjdk.samtools.{ SAMSequenceRecord, SamReaderFactory } import htsjdk.samtools.reference.IndexedFastaSequenceFile import htsjdk.variant.variantcontext.VariantContext import htsjdk.variant.vcf.VCFFileReader @@ -28,6 +28,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import scala.collection.JavaConversions._ import scala.collection.mutable.ListBuffer +import scala.collection.parallel.ParMap class BastyGenerateFasta(val root: Configurable) extends ToolCommandFuntion with Reference { javaMainClass = getClass.getName @@ -155,7 +156,7 @@ object BastyGenerateFasta extends ToolCommand { } } - protected var cmdArgs: Args = _ + protected implicit var cmdArgs: Args = _ private val chunkSize = 100000 /** @@ -165,11 +166,18 @@ object BastyGenerateFasta extends ToolCommand { val argsParser = new OptParser cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1) - if (cmdArgs.outputVariants != null) writeVariantsOnly() - if (cmdArgs.outputConsensus != null || cmdArgs.outputConsensusVariants != null) writeConsensus() + if (cmdArgs.outputVariants != null) { + writeVariantsOnly() + } + if (cmdArgs.outputConsensus != null || cmdArgs.outputConsensusVariants != null) { + writeConsensus() + } + + //FIXME: what to do if outputcConsensus is set, but not outputConsensusVariants (and vice versa)? } protected def writeConsensus() { + //FIXME: preferably split this up in functions, so that they can be unit tested val referenceFile = new IndexedFastaSequenceFile(cmdArgs.reference) val referenceDict = referenceFile.getSequenceDictionary @@ -253,7 +261,7 @@ object BastyGenerateFasta extends ToolCommand { } } - protected def writeVariantsOnly() { + protected[tools] def writeVariantsOnly() { val writer = new PrintWriter(cmdArgs.outputVariants) writer.println(">" + cmdArgs.outputName) val vcfReader = new VCFFileReader(cmdArgs.inputVcf, false) @@ -265,17 +273,34 @@ object BastyGenerateFasta extends ToolCommand { vcfReader.close() } - protected def getMaxAllele(vcfRecord: VariantContext): String = { + // TODO: what does this do? + // Seems to me it finds the allele in a sample with the highest AD value + // if this allele is shorter than the largest allele, it will append '-' to the string + protected[tools] def getMaxAllele(vcfRecord: VariantContext)(implicit cmdArgs: Args): String = { val maxSize = getLongestAllele(vcfRecord).getBases.length - if (cmdArgs.sampleName == null) return fillAllele(vcfRecord.getReference.getBaseString, maxSize) + if (cmdArgs.sampleName == null) { + return fillAllele(vcfRecord.getReference.getBaseString, maxSize) + } val genotype = vcfRecord.getGenotype(cmdArgs.sampleName) - if (genotype == null) return fillAllele("", maxSize) + + if (genotype == null) { + return fillAllele("", maxSize) + } + val AD = if (genotype.hasAD) genotype.getAD else Array.fill(vcfRecord.getAlleles.size())(cmdArgs.minAD) - if (AD == null) return fillAllele("", maxSize) + + if (AD == null) { + return fillAllele("", maxSize) + } + val maxADid = AD.zipWithIndex.maxBy(_._1)._2 - if (AD(maxADid) < cmdArgs.minAD) return fillAllele("", maxSize) + + if (AD(maxADid) < cmdArgs.minAD) { + return fillAllele("", maxSize) + } + fillAllele(vcfRecord.getAlleles()(maxADid).getBaseString, maxSize) } -} \ No newline at end of file +} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala index 68aedb1deebcc1ddb44c44c418d724325b14c166..3d2ba4affc090554b5c47dd65bea9466f7720753 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala @@ -61,12 +61,18 @@ object BiopetFlagstat extends ToolCommand { flagstat } - case class Args(inputFile: File = null, summaryFile: Option[File] = None, region: Option[String] = None) extends AbstractArgs + case class Args(inputFile: File = null, + outputFile: Option[File] = None, + summaryFile: Option[File] = None, + region: Option[String] = None) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('I', "inputFile") required () valueName "<file>" action { (x, c) => c.copy(inputFile = x) } text "input bam file" + opt[File]('o', "outputFile") valueName "<file>" action { (x, c) => + c.copy(outputFile = Some(x)) + } text "output file" opt[File]('s', "summaryFile") valueName "<file>" action { (x, c) => c.copy(summaryFile = Some(x)) } text "summary output file" @@ -151,7 +157,14 @@ object BiopetFlagstat extends ToolCommand { writer.close() } - println(flagstatCollector.report) + commandArgs.outputFile match { + case Some(file) => { + val writer = new PrintWriter(file) + writer.println(flagstatCollector.report) + writer.close() + } + case _ => println(flagstatCollector.report) + } } class FlagstatCollector { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala index 2797ffc2de25d6045f7717947149cd3900437ced..919316c2ba04a3c69005a7fa1a1a5464d3629c3a 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala @@ -116,7 +116,7 @@ object CheckAllelesVcfInBam extends ToolCommand { } val counts = for (samRecord <- bamIter if !filterRead(samRecord)) { - checkAlles(samRecord, vcfRecord) match { + checkAlleles(samRecord, vcfRecord) match { case Some(a) => if (countReports(sample).aCounts.contains(a)) countReports(sample).aCounts(a) += 1 else countReports(sample).aCounts += (a -> 1) case _ => countReports(sample).notFound += 1 @@ -142,7 +142,7 @@ object CheckAllelesVcfInBam extends ToolCommand { writer.close() } - def checkAlles(samRecord: SAMRecord, vcfRecord: VariantContext): Option[String] = { + def checkAlleles(samRecord: SAMRecord, vcfRecord: VariantContext): Option[String] = { val readStartPos = List.range(0, samRecord.getReadBases.length) .find(x => samRecord.getReferencePositionAtReadPosition(x + 1) == vcfRecord.getStart) getOrElse { return None } val readBases = samRecord.getReadBases diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala index 5105c07fb509427efa1a1747aba00bd78318e14a..3f4c3a4e7d392557c71203e2e143250c807e4a81 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala @@ -55,10 +55,10 @@ object FastqSplitter extends ToolCommand { class OptParser extends AbstractOptParser { opt[File]('I', "inputFile") required () valueName "<file>" action { (x, c) => c.copy(inputFile = x) - } text "out is a required file property" + } text "Path to input file" opt[File]('o', "output") required () unbounded () valueName "<file>" action { (x, c) => c.copy(outputFile = x :: c.outputFile) - } text "out is a required file property" + } text "Path to output file" } /** diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala index dafa21f20d18ce0c0e364e17778b3547f0d2ed67..b6666fcf1013fffc9ec08af7aa85b84c0c187de4 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala @@ -15,7 +15,7 @@ */ package nl.lumc.sasc.biopet.tools -import java.io.File +import java.io.{ PrintWriter, File } import htsjdk.samtools.{ QueryInterval, SAMRecord, SamReaderFactory, ValidationStringency } import nl.lumc.sasc.biopet.core.ToolCommand @@ -24,15 +24,20 @@ import scala.collection.JavaConversions._ import scala.io.Source object FindRepeatsPacBio extends ToolCommand { - case class Args(inputBam: File = null, inputBed: File = null) extends AbstractArgs + case class Args(inputBam: File = null, + outputFile: Option[File] = None, + inputBed: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('I', "inputBam") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(inputBam = x) - } + } text "Path to input file" + opt[File]('o', "outputFile") maxOccurs 1 valueName "<file>" action { (x, c) => + c.copy(outputFile = Some(x)) + } text "Path to input file" opt[File]('b', "inputBed") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(inputBed = x) - } text "output file, default to stdout" + } text "Path to bed file" } /** @@ -50,7 +55,6 @@ object FindRepeatsPacBio extends ToolCommand { val header = List("chr", "startPos", "stopPos", "Repeat_seq", "repeatLength", "original_Repeat_readLength", "Calculated_repeat_readLength", "minLength", "maxLength", "inserts", "deletions", "notSpan") - println(header.mkString("\t")) for ( bedLine <- Source.fromFile(commandArgs.inputBed).getLines(); @@ -84,9 +88,21 @@ object FindRepeatsPacBio extends ToolCommand { if (length < minLength || minLength == -1) minLength = length } } - println(List(chr, startPos, stopPos, typeRepeat, repeatLength, oriRepeatLength, calcRepeatLength.mkString(","), minLength, - maxLength, inserts.mkString("/"), deletions.mkString("/"), notSpan).mkString("\t")) bamIter.close() + commandArgs.outputFile match { + case Some(file) => { + val writer = new PrintWriter(file) + writer.println(header.mkString("\t")) + writer.println(List(chr, startPos, stopPos, typeRepeat, repeatLength, oriRepeatLength, calcRepeatLength.mkString(","), minLength, + maxLength, inserts.mkString("/"), deletions.mkString("/"), notSpan).mkString("\t")) + writer.close() + } + case _ => { + println(header.mkString("\t")) + println(List(chr, startPos, stopPos, typeRepeat, repeatLength, oriRepeatLength, calcRepeatLength.mkString(","), minLength, + maxLength, inserts.mkString("/"), deletions.mkString("/"), notSpan).mkString("\t")) + } + } } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala index c0e29aa2155ffdfd4a0a609001b432810de388b9..e1673ff44fbf3bf4d77b80799f0d9fd01446920f 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala @@ -73,7 +73,7 @@ object SageCountFastq extends ToolCommand { if (counts.contains(seq)) counts(seq) += 1 else counts += (seq -> 1) count += 1 - if (count % 1000000 == 0) System.err.println(count + " sequences done") + if (count % 1000000 == 0) logger.info(count + " sequences done") } }) logger.info(count + " sequences done") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala index 9ac8d8f2c0609f48a0efc96c24d7fa7b5b71fa15..c26604821b4df85fadde6c284c8c78f4a9456ca6 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala @@ -77,10 +77,10 @@ object SageCreateLibrary extends ToolCommand { opt[Int]("length") required () unbounded () action { (x, c) => c.copy(length = x) } - opt[File]("noTagsOutput") required () unbounded () valueName "<file>" action { (x, c) => + opt[File]("noTagsOutput") unbounded () valueName "<file>" action { (x, c) => c.copy(noTagsOutput = x) } - opt[File]("noAntiTagsOutput") required () unbounded () valueName "<file>" action { (x, c) => + opt[File]("noAntiTagsOutput") unbounded () valueName "<file>" action { (x, c) => c.copy(noAntiTagsOutput = x) } opt[File]("allGenesOutput") unbounded () valueName "<file>" action { (x, c) => @@ -88,8 +88,7 @@ object SageCreateLibrary extends ToolCommand { } } - var tagRegex: Regex = null - var geneRegex = """ENSG[0-9]{11}""".r + val geneRegex = """ENSG[0-9]{11}""".r val tagGenesMap: mutable.Map[String, TagGenes] = mutable.Map() @@ -114,23 +113,24 @@ object SageCreateLibrary extends ToolCommand { if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input) - tagRegex = (commandArgs.tag + "[CATG]{" + commandArgs.length + "}").r + val tagRegex = (commandArgs.tag + "[CATG]{" + commandArgs.length + "}").r var count = 0 - System.err.println("Reading fasta file") + logger.info("Reading fasta file") val reader = FastaReaderHelper.readFastaDNASequence(commandArgs.input) - System.err.println("Finding tags") + logger.info("Finding tags") for ((name, seq) <- reader) { - getTags(name, seq) + val result = getTags(name, seq, tagRegex) + addTagresultToTaglib(name, result) count += 1 - if (count % 10000 == 0) System.err.println(count + " transcripts done") + if (count % 10000 == 0) logger.info(count + " transcripts done") } - System.err.println(count + " transcripts done") + logger.info(count + " transcripts done") - System.err.println("Start sorting tags") + logger.info("Start sorting tags") val tagGenesMapSorted: SortedMap[String, TagGenes] = SortedMap(tagGenesMap.toArray: _*) - System.err.println("Writting output files") + logger.info("Writting output files") val writer = new PrintWriter(commandArgs.output) writer.println("#tag\tfirstTag\tAllTags\tFirstAntiTag\tAllAntiTags") for ((tag, genes) <- tagGenesMapSorted) { @@ -167,7 +167,7 @@ object SageCreateLibrary extends ToolCommand { } } - def addTagresultToTaglib(name: String, tagResult: TagResult) { + private def addTagresultToTaglib(name: String, tagResult: TagResult) { val id = name.split(" ").head //.stripPrefix("hg19_ensGene_") val geneID = geneRegex.findFirstIn(name).getOrElse("unknown_gene") allGenes.add(geneID) @@ -195,15 +195,13 @@ object SageCreateLibrary extends ToolCommand { } } - def getTags(name: String, seq: DNASequence): TagResult = { + def getTags(name: String, seq: DNASequence, tagRegex: Regex): TagResult = { val allTags: List[String] = for (tag <- tagRegex.findAllMatchIn(seq.getSequenceAsString).toList) yield tag.toString() val firstTag = if (allTags.isEmpty) null else allTags.last val allAntiTags: List[String] = for (tag <- tagRegex.findAllMatchIn(seq.getReverseComplement.getSequenceAsString).toList) yield tag.toString() val firstAntiTag = if (allAntiTags.isEmpty) null else allAntiTags.head val result = new TagResult(firstTag, allTags, firstAntiTag, allAntiTags) - addTagresultToTaglib(name, result) - result } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala index a1fedb5bfbec080de1e2976ce766313e6c319249..a156c1adb483c6fedfa2eb40de6314d35289ecb8 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala @@ -37,7 +37,7 @@ class SageCreateTagCounts(val root: Configurable) extends ToolCommandFuntion { @Output(doc = "Sense count file", shortName = "sense", required = true) var countSense: File = _ - @Output(doc = "Sense all coun filet", shortName = "allsense", required = true) + @Output(doc = "Sense all count file", shortName = "allsense", required = true) var countAllSense: File = _ @Output(doc = "AntiSense count file", shortName = "antisense", required = true) @@ -148,9 +148,18 @@ object SageCreateTagCounts extends ToolCommand { writer.close() } } - writeFile(commandArgs.countSense, senseCounts) - writeFile(commandArgs.countAllSense, allSenseCounts) - writeFile(commandArgs.countAntiSense, antiSenseCounts) - writeFile(commandArgs.countAllAntiSense, allAntiSenseCounts) + + if (commandArgs.countSense != null) { + writeFile(commandArgs.countSense, senseCounts) + } + if (commandArgs.countAllAntiSense != null) { + writeFile(commandArgs.countAllAntiSense, allAntiSenseCounts) + } + if (commandArgs.countAllSense != null) { + writeFile(commandArgs.countAllSense, allSenseCounts) + } + if (commandArgs.countAntiSense != null) { + writeFile(commandArgs.countAntiSense, antiSenseCounts) + } } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala index 0f0e11c62ac8f935a064b5b3b042d98527e48934..879e489fd2671504b1df2ddbac11feaea27bf6da 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala @@ -15,7 +15,7 @@ */ package nl.lumc.sasc.biopet.tools -import java.io.File +import java.io.{ PrintWriter, File } import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.utils.ConfigUtils._ @@ -27,12 +27,15 @@ import scala.io.Source * This tool can convert a tsv to a json file */ object SamplesTsvToJson extends ToolCommand { - case class Args(inputFiles: List[File] = Nil) extends AbstractArgs + case class Args(inputFiles: List[File] = Nil, outputFile: Option[File] = None) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('i', "inputFiles") required () unbounded () valueName "<file>" action { (x, c) => c.copy(inputFiles = x :: c.inputFiles) } text "Input must be a tsv file, first line is seen as header and must at least have a 'sample' column, 'library' column is optional, multiple files allowed" + opt[File]('o', "outputFile") unbounded () valueName "<file>" action { (x, c) => + c.copy(outputFile = Some(x)) + } } /** Executes SamplesTsvToJson */ @@ -40,41 +43,53 @@ object SamplesTsvToJson extends ToolCommand { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - val fileMaps = for (inputFile <- commandArgs.inputFiles) yield { - val reader = Source.fromFile(inputFile) - val lines = reader.getLines().toList.filter(!_.isEmpty) - val header = lines.head.split("\t") - val sampleColumn = header.indexOf("sample") - val libraryColumn = header.indexOf("library") - if (sampleColumn == -1) throw new IllegalStateException("Sample column does not exist in: " + inputFile) + val jsonString = stringFromInputs(commandArgs.inputFiles) + commandArgs.outputFile match { + case Some(file) => { + val writer = new PrintWriter(file) + writer.println(jsonString) + writer.close() + } + case _ => println(jsonString) + } + } - val sampleLibCache: mutable.Set[(String, Option[String])] = mutable.Set() + def mapFromFile(inputFile: File): Map[String, Any] = { + val reader = Source.fromFile(inputFile) + val lines = reader.getLines().toList.filter(!_.isEmpty) + val header = lines.head.split("\t") + val sampleColumn = header.indexOf("sample") + val libraryColumn = header.indexOf("library") + if (sampleColumn == -1) throw new IllegalStateException("Sample column does not exist in: " + inputFile) - val librariesValues: List[Map[String, Any]] = for (tsvLine <- lines.tail) yield { - val values = tsvLine.split("\t") - require(header.length == values.length, "Number of columns is not the same as the header") - val sample = values(sampleColumn) - val library = if (libraryColumn != -1) Some(values(libraryColumn)) else None + val sampleLibCache: mutable.Set[(String, Option[String])] = mutable.Set() - //FIXME: this is a workaround, should be removed after fixing #180 - if (sample.head.isDigit || library.forall(_.head.isDigit)) - throw new IllegalStateException("Sample or library may not start with a number") + val librariesValues: List[Map[String, Any]] = for (tsvLine <- lines.tail) yield { + val values = tsvLine.split("\t") + require(header.length == values.length, "Number of columns is not the same as the header") + val sample = values(sampleColumn) + val library = if (libraryColumn != -1) Some(values(libraryColumn)) else None - if (sampleLibCache.contains((sample, library))) - throw new IllegalStateException(s"Combination of $sample and $library is found multiple times") - else sampleLibCache.add((sample, library)) - val valuesMap = (for ( - t <- 0 until values.size if !values(t).isEmpty && t != sampleColumn && t != libraryColumn - ) yield header(t) -> values(t)).toMap - library match { - case Some(lib) => Map("samples" -> Map(sample -> Map("libraries" -> Map(library -> valuesMap)))) - case _ => Map("samples" -> Map(sample -> valuesMap)) - } + //FIXME: this is a workaround, should be removed after fixing #180 + if (sample.head.isDigit || library.forall(_.head.isDigit)) + throw new IllegalStateException("Sample or library may not start with a number") + + if (sampleLibCache.contains((sample, library))) + throw new IllegalStateException(s"Combination of $sample ${library.map("and " + _).getOrElse("")} is found multiple times") + else sampleLibCache.add((sample, library)) + val valuesMap = (for ( + t <- 0 until values.size if !values(t).isEmpty && t != sampleColumn && t != libraryColumn + ) yield header(t) -> values(t)).toMap + library match { + case Some(lib) => Map("samples" -> Map(sample -> Map("libraries" -> Map(lib -> valuesMap)))) + case _ => Map("samples" -> Map(sample -> valuesMap)) } - librariesValues.foldLeft(Map[String, Any]())((acc, kv) => mergeMaps(acc, kv)) } - val map = fileMaps.foldLeft(Map[String, Any]())((acc, kv) => mergeMaps(acc, kv)) - val json = mapToJson(map) - println(json.spaces2) + librariesValues.foldLeft(Map[String, Any]())((acc, kv) => mergeMaps(acc, kv)) + } + + def stringFromInputs(inputs: List[File]): String = { + val map = inputs.map(f => mapFromFile(f)).foldLeft(Map[String, Any]())((acc, kv) => mergeMaps(acc, kv)) + mapToJson(map).spaces2 } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala index fea284418da2e69cf99fdff0566769afdb93af5f..148fb4341941acc0df32b1efdc1d997428a6af61 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala @@ -15,7 +15,7 @@ */ package nl.lumc.sasc.biopet.tools -import java.io.File +import java.io.{ PrintWriter, File } import htsjdk.samtools.fastq.{ FastqReader, FastqRecord } import nl.lumc.sasc.biopet.core.config.Configurable @@ -45,7 +45,7 @@ class SeqStat(val root: Configurable) extends ToolCommandFuntion with Summarizab override def defaultCoreMemory = 2.5 - override def cmdLine = super.cmdLine + required("-i", input) + " > " + required(output) + override def cmdLine = super.cmdLine + required("-i", input) + required("-o", output) def summaryStats: Map[String, Any] = { val map = ConfigUtils.fileToConfigMap(output) @@ -108,7 +108,7 @@ object SeqStat extends ToolCommand { private var baseQualHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0) private var readQualHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0) - case class Args(fastq: File = new File("")) extends AbstractArgs + case class Args(fastq: File = null, outputJson: Option[File] = None) extends AbstractArgs class OptParser extends AbstractOptParser { @@ -117,11 +117,14 @@ object SeqStat extends ToolCommand { |$commandName - Summarize FastQ """.stripMargin) - opt[File]('i', "fastq") required () valueName "<fastq>" action { (x, c) => + opt[File]('i', "fastq") required () unbounded () valueName "<fastq>" action { (x, c) => c.copy(fastq = x) } validate { x => if (x.exists) success else failure("FASTQ file not found") } text "FastQ file to generate stats from" + opt[File]('o', "output") unbounded () valueName "<json>" action { (x, c) => + c.copy(outputJson = Some(x)) + } text "File to write output to, if not supplied output go to stdout" } /** @@ -317,6 +320,13 @@ object SeqStat extends ToolCommand { )) ) - println(ConfigUtils.mapToJson(report)) + commandArgs.outputJson match { + case Some(file) => { + val writer = new PrintWriter(file) + writer.println(ConfigUtils.mapToJson(report)) + writer.close() + } + case _ => println(ConfigUtils.mapToJson(report)) + } } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala index 398a9b73f733963960f7aa5ee55414a086d6127d..cab4992805ff336f7bcc74c39c8241e163132a84 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala @@ -15,7 +15,8 @@ */ package nl.lumc.sasc.biopet.tools -import java.io.File +import java.io.{ PrintWriter, File } +import java.nio.file.Paths import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.core.summary.Summary @@ -35,15 +36,27 @@ object SummaryToTsv extends ToolCommand { opt[File]('s', "summary") required () unbounded () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(summary = x) } - opt[File]('o', "output") maxOccurs 1 unbounded () valueName "<file>" action { (x, c) => + opt[File]('o', "outputFile") unbounded () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(outputFile = Some(x)) } - opt[String]('p', "path") required () unbounded () valueName "<value>" action { (x, c) => + opt[String]('p', "path") required () unbounded () valueName "<string>" action { (x, c) => c.copy(values = c.values ::: x :: Nil) - } + } text + """ + |String that determines the values extracted from the summary. Should be of the format: + |<header_name>=<namespace>:<lower_namespace>:<even_lower_namespace>... + """.stripMargin opt[String]('m', "mode") maxOccurs 1 unbounded () valueName "<root|sample|lib>" action { (x, c) => c.copy(mode = x) - } + } validate { + x => if (Set("root", "sample", "lib").contains(x)) success else failure("Unsupported mode") + } text + """ + |Determines on what level to aggregate data. + |root: at the root level + |sample: at the sample level + |lib: at the library level + """.stripMargin } @@ -56,14 +69,23 @@ object SummaryToTsv extends ToolCommand { val paths = cmdArgs.values.map(x => { val split = x.split("=", 2) split(0) -> split(1).split(":") - }) + }).toMap - val values = fetchValues(summary, paths.toMap, sample = cmdArgs.mode == "sample", lib = cmdArgs.mode == "lib") + val values = fetchValues(summary, paths, sample = cmdArgs.mode == "sample", lib = cmdArgs.mode == "lib") - println(paths.map(_._1).mkString("\t", "\t", "")) - - for (lineId <- values.head._2.keys) { - println(paths.map(x => values(x._1)(lineId).getOrElse("")).mkString(lineId + "\t", "\t", "")) + cmdArgs.outputFile match { + case Some(file) => { + val writer = new PrintWriter(file) + writer.println(createHeader(paths)) + for (lineId <- values.head._2.keys) + writer.println(createLine(paths, values, lineId)) + writer.close() + } + case _ => { + println(createHeader(paths)) + for (lineId <- values.head._2.keys) + println(createLine(paths, values, lineId)) + } } } @@ -71,9 +93,19 @@ object SummaryToTsv extends ToolCommand { sample: Boolean = false, lib: Boolean = false) = { for ((name, path) <- paths) yield name -> { - if (lib) summary.getLibraryValues(path: _*).map(a => (a._1._1 + "-" + a._1._2) -> a._2) - else if (sample) summary.getSampleValues(path: _*) + if (lib) { + summary.getLibraryValues(path: _*).map(a => (a._1._1 + "-" + a._1._2) -> a._2) + } else if (sample) summary.getSampleValues(path: _*) else Map("value" -> summary.getValue(path: _*)) } } + + def createHeader(paths: Map[String, Array[String]]): String = { + paths.map(_._1).mkString("\t", "\t", "") + } + + def createLine(paths: Map[String, Array[String]], + values: Map[String, Map[String, Option[Any]]], lineId: String): String = { + paths.map(x => values(x._1)(lineId).getOrElse("")).mkString(lineId + "\t", "\t", "") + } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala index 91dee40a1aa1cf7b32a2cc6a3a0f88eee76bfc99..0ca7ea04255d49bde04f8f09d3dfb51ffe31c1fa 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala @@ -56,7 +56,7 @@ class VcfFilter(val root: Configurable) extends ToolCommandFuntion { object VcfFilter extends ToolCommand { /** Container class for a trio */ - protected case class Trio(child: String, father: String, mother: String) { + protected[tools] case class Trio(child: String, father: String, mother: String) { def this(arg: String) = { this(arg.split(":")(0), arg.split(":")(1), arg.split(":")(2)) } @@ -278,7 +278,7 @@ object VcfFilter extends ToolCommand { } /** - * Checks if AD genotype field have a minimal value + * Checks if non-ref AD genotype field have a minimal value * @param record VCF record * @param minAlternateDepth minimal depth * @param minSamplesPass Minimal number of samples to pass filter diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala index 17e72add93db63944565f1834a02516032db121d..c1b4ca49c0647844d3d5cbd27e483d01b925e71e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala @@ -136,44 +136,64 @@ object VcfStats extends ToolCommand { generalWiggle: List[String] = Nil, genotypeWiggle: List[String] = Nil) extends AbstractArgs + private val generalWiggleOptions = List("Total", "Biallelic", "ComplexIndel", "Filtered", "FullyDecoded", "Indel", "Mixed", + "MNP", "MonomorphicInSamples", "NotFiltered", "PointEvent", "PolymorphicInSamples", + "SimpleDeletion", "SimpleInsertion", "SNP", "StructuralIndel", "Symbolic", + "SymbolicOrSV", "Variant") + + private val genotypeWiggleOptions = List("Total", "Het", "HetNonRef", "Hom", "HomRef", "HomVar", "Mixed", "NoCall", "NonInformative", + "Available", "Called", "Filtered", "Variant") + /** Parsing commandline arguments */ class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required () unbounded () valueName "<file>" action { (x, c) => + opt[File]('I', "inputFile") required () unbounded () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(inputFile = x) - } - opt[File]('R', "referenceFile") required () unbounded () valueName "<file>" action { (x, c) => + } validate { + x => if (x.exists) success else failure("Input VCF required") + } text "Input VCF file (required)" + opt[File]('R', "referenceFile") required () unbounded () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(referenceFile = x) - } - opt[File]('o', "outputDir") required () unbounded () valueName "<file>" action { (x, c) => + } validate { + x => if (x.exists) success else failure("Reference file required") + } text "Fasta reference which was used to call input VCF (required)" + opt[File]('o', "outputDir") required () unbounded () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(outputDir = x) - } + } validate { + x => if (x == null) failure("Output directory required") else success + } text "Path to directory for output (required)" opt[File]('i', "intervals") unbounded () valueName ("<file>") action { (x, c) => c.copy(intervals = Some(x)) - } + } text "Path to interval (BED) file (optional)" opt[String]("infoTag") unbounded () valueName "<tag>" action { (x, c) => c.copy(infoTags = x :: c.infoTags) - } + } text "Summarize these info tags. Default is all tags" opt[String]("genotypeTag") unbounded () valueName "<tag>" action { (x, c) => c.copy(genotypeTags = x :: c.genotypeTags) - } + } text "Summarize these genotype tags. Default is all tags" opt[Unit]("allInfoTags") unbounded () action { (x, c) => c.copy(allInfoTags = true) - } + } text "Summarize all info tags. Default false" opt[Unit]("allGenotypeTags") unbounded () action { (x, c) => c.copy(allGenotypeTags = true) - } + } text "Summarize all genotype tags. Default false" opt[Int]("binSize") unbounded () action { (x, c) => c.copy(binSize = x) - } + } text "Binsize in estimated base pairs" opt[Unit]("writeBinStats") unbounded () action { (x, c) => c.copy(writeBinStats = true) - } + } text "Write bin statistics. Default False" opt[String]("generalWiggle") unbounded () action { (x, c) => c.copy(generalWiggle = x :: c.generalWiggle, writeBinStats = true) - } + } validate { + x => if (generalWiggleOptions.contains(x)) success else failure(s"""Nonexistent field $x""") + } text s"""Create a wiggle track with bin size <binSize> for any of the following statistics: + |${generalWiggleOptions.mkString(", ")}""".stripMargin opt[String]("genotypeWiggle") unbounded () action { (x, c) => c.copy(genotypeWiggle = x :: c.genotypeWiggle, writeBinStats = true) - } + } validate { + x => if (genotypeWiggleOptions.contains(x)) success else failure(s"""Non-existent field $x""") + } text s"""Create a wiggle track with bin size <binSize> for any of the following genotype fields: + |${genotypeWiggleOptions.mkString(", ")}""".stripMargin } /** @@ -481,7 +501,7 @@ object VcfStats extends ToolCommand { } /** Function to check all general stats, all info expect sample/genotype specific stats */ - protected def checkGeneral(record: VariantContext, additionalTags: List[String]): Map[String, Map[String, Map[Any, Int]]] = { + protected[tools] def checkGeneral(record: VariantContext, additionalTags: List[String]): Map[String, Map[String, Map[Any, Int]]] = { val buffer = mutable.Map[String, Map[Any, Int]]() def addToBuffer(key: String, value: Any, found: Boolean): Unit = { @@ -490,7 +510,7 @@ object VcfStats extends ToolCommand { else buffer += key -> (map + (value -> map.getOrElse(value, 0))) } - buffer += "QUAL" -> Map(record.getPhredScaledQual -> 1) + buffer += "QUAL" -> Map(Math.round(record.getPhredScaledQual) -> 1) addToBuffer("SampleDistribution-Het", record.getGenotypes.count(genotype => genotype.isHet), found = true) addToBuffer("SampleDistribution-HetNonRef", record.getGenotypes.count(genotype => genotype.isHetNonRef), found = true) @@ -537,7 +557,7 @@ object VcfStats extends ToolCommand { } /** Function to check sample/genotype specific stats */ - protected def checkGenotype(record: VariantContext, genotype: Genotype, additionalTags: List[String]): Map[String, Map[String, Map[Any, Int]]] = { + protected[tools] def checkGenotype(record: VariantContext, genotype: Genotype, additionalTags: List[String]): Map[String, Map[String, Map[Any, Int]]] = { val buffer = mutable.Map[String, Map[Any, Int]]() def addToBuffer(key: String, value: Any, found: Boolean): Unit = { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala index 42e0d55ad5d9de3fab8d2601d5605b0941939b9e..7a89433949ebd5d042a60cb51c25f7ed130e599f 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala @@ -16,15 +16,18 @@ package nl.lumc.sasc.biopet.tools import java.io.File +import java.util -import htsjdk.variant.variantcontext.VariantContextBuilder +import htsjdk.variant.variantcontext.{ VariantContext, VariantContextBuilder } import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder } import htsjdk.variant.vcf._ import nl.lumc.sasc.biopet.core.{ ToolCommandFuntion, ToolCommand } import nl.lumc.sasc.biopet.core.config.Configurable +import nl.lumc.sasc.biopet.utils.VcfUtils.scalaListToJavaObjectArrayList import org.broadinstitute.gatk.utils.commandline.{ Output, Input } import scala.collection.JavaConversions._ +import scala.collection.JavaConverters._ /** * Biopet extension for tool VcfWithVcf @@ -99,7 +102,7 @@ object VcfWithVcf extends ToolCommand { | By default we will return all values found for a given field. | With <method> the values will processed after getting it from the secondary VCF file, posible methods are: | - max : takes maximum of found value, only works for numeric (integer/float) fields - | - min : takes minemal of found value, only works for numeric (integer/float) fields + | - min : takes minimum of found value, only works for numeric (integer/float) fields | - unique: takes only unique values """.stripMargin opt[Boolean]("match") valueName "<Boolean>" maxOccurs 1 action { (x, c) => c.copy(matchAllele = x) @@ -124,7 +127,7 @@ object VcfWithVcf extends ToolCommand { for (x <- commandArgs.fields) { if (header.hasInfoLine(x.outputField)) - throw new IllegalArgumentException("Field '" + x.outputField + "' already exist in input vcf") + throw new IllegalArgumentException("Field '" + x.outputField + "' already exists in input vcf") if (!secondHeader.hasInfoLine(x.inputField)) throw new IllegalArgumentException("Field '" + x.inputField + "' does not exist in secondary vcf") @@ -140,44 +143,11 @@ object VcfWithVcf extends ToolCommand { var counter = 0 for (record <- reader) { - val secondaryRecords = if (commandArgs.matchAllele) { - secondaryReader.query(record.getContig, record.getStart, record.getEnd).toList. - filter(x => record.getAlternateAlleles.exists(x.hasAlternateAllele)) - } else { - secondaryReader.query(record.getContig, record.getStart, record.getEnd).toList - } + val secondaryRecords = getSecondaryRecords(secondaryReader, record, commandArgs.matchAllele) - val fieldMap = (for ( - f <- commandArgs.fields if secondaryRecords.exists(_.hasAttribute(f.inputField)) - ) yield { - f.outputField -> (for ( - secondRecord <- secondaryRecords if secondRecord.hasAttribute(f.inputField) - ) yield { - secondRecord.getAttribute(f.inputField) match { - case l: List[_] => l - case x => List(x) - } - }).fold(Nil)(_ ::: _) - }).toMap - - writer.add(fieldMap.foldLeft(new VariantContextBuilder(record))((builder, attribute) => { - builder.attribute(attribute._1, commandArgs.fields.filter(_.outputField == attribute._1).head.fieldMethod match { - case FieldMethod.max => - header.getInfoHeaderLine(attribute._1).getType match { - case VCFHeaderLineType.Integer => Array(attribute._2.map(_.toString.toInt).max) - case VCFHeaderLineType.Float => Array(attribute._2.map(_.toString.toFloat).max) - case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric") - } - case FieldMethod.min => - header.getInfoHeaderLine(attribute._1).getType match { - case VCFHeaderLineType.Integer => Array(attribute._2.map(_.toString.toInt).min) - case VCFHeaderLineType.Float => Array(attribute._2.map(_.toString.toFloat).min) - case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric") - } - case FieldMethod.unique => attribute._2.distinct.toArray - case _ => attribute._2.toArray - }) - }).make()) + val fieldMap = createFieldMap(commandArgs.fields, secondaryRecords) + + writer.add(createRecord(fieldMap, record, commandArgs.fields, header)) counter += 1 if (counter % 100000 == 0) { @@ -192,4 +162,69 @@ object VcfWithVcf extends ToolCommand { secondaryReader.close() logger.info("Done") } + + /** + * Create Map of field -> List of attributes in secondary records + * @param fields List of Field + * @param secondaryRecords List of VariantContext with secondary records + * @return Map of fields and their values in secondary records + */ + def createFieldMap(fields: List[Fields], secondaryRecords: List[VariantContext]): Map[String, List[Any]] = { + val fieldMap = (for ( + f <- fields if secondaryRecords.exists(_.hasAttribute(f.inputField)) + ) yield { + f.outputField -> (for ( + secondRecord <- secondaryRecords if secondRecord.hasAttribute(f.inputField) + ) yield { + secondRecord.getAttribute(f.inputField) match { + case l: List[_] => l + case y: util.ArrayList[_] => y.toList + case x => List(x) + } + }).fold(Nil)(_ ::: _) + }).toMap + fieldMap + } + + /** + * Get secondary records matching the query record + * @param secondaryReader reader for secondary records + * @param record query record + * @param matchAllele allele has to match query allele? + * @return List of VariantContext + */ + def getSecondaryRecords(secondaryReader: VCFFileReader, + record: VariantContext, matchAllele: Boolean): List[VariantContext] = { + if (matchAllele) { + secondaryReader.query(record.getContig, record.getStart, record.getEnd).toList. + filter(x => record.getAlternateAlleles.exists(x.hasAlternateAllele)) + } else { + secondaryReader.query(record.getContig, record.getStart, record.getEnd).toList + } + } + + def createRecord(fieldMap: Map[String, List[Any]], record: VariantContext, + fields: List[Fields], header: VCFHeader): VariantContext = { + fieldMap.foldLeft(new VariantContextBuilder(record))((builder, attribute) => { + builder.attribute(attribute._1, fields.filter(_.outputField == attribute._1).head.fieldMethod match { + case FieldMethod.max => + header.getInfoHeaderLine(attribute._1).getType match { + case VCFHeaderLineType.Integer => scalaListToJavaObjectArrayList(List(attribute._2.map(_.toString.toInt).max)) + case VCFHeaderLineType.Float => scalaListToJavaObjectArrayList(List(attribute._2.map(_.toString.toFloat).max)) + case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric") + } + case FieldMethod.min => + header.getInfoHeaderLine(attribute._1).getType match { + case VCFHeaderLineType.Integer => scalaListToJavaObjectArrayList(List(attribute._2.map(_.toString.toInt).min)) + case VCFHeaderLineType.Float => scalaListToJavaObjectArrayList(List(attribute._2.map(_.toString.toFloat).min)) + case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric") + } + case FieldMethod.unique => scalaListToJavaObjectArrayList(attribute._2.distinct) + case _ => { + print(attribute._2.getClass.toString) + scalaListToJavaObjectArrayList(attribute._2) + } + }) + }).make() + } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala index 030ca0347993ecc565373eaf98114a5966a1a535..2518113bac32bf4f1a58a102d16d49bb14ae63f7 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala @@ -50,7 +50,7 @@ class VepNormalizer(val root: Configurable) extends ToolCommandFuntion { var mode: String = config("mode", default = "standard") var doNotRemove: Boolean = config("do_not_remove", default = false) - override def defaultCoreMemory = 1.0 + override def defaultCoreMemory = 4.0 override def cmdLine = super.cmdLine + required("-I", inputVCF) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala index 9074ff1f3bcc71fda5ad5c8aa9ba5ae5867fc6c8..8e375f4e7e35cbb49c9cc90c688753b2b6ca42ea 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala @@ -15,6 +15,8 @@ */ package nl.lumc.sasc.biopet.utils +import java.util + import htsjdk.variant.variantcontext.VariantContext import scala.collection.JavaConversions._ @@ -43,4 +45,39 @@ object VcfUtils { def fillAllele(bases: String, newSize: Int, fillWith: Char = '-'): String = { bases + Array.fill[Char](newSize - bases.length)(fillWith).mkString } + + /** + * Stands for scalaListToJavaObjectArrayList + * Convert a scala List[Any] to a java ArrayList[Object]. This is necessary for BCF conversions + * As scala ints and floats cannot be directly cast to java objects (they aren't objects), + * we need to box them. + * For items not Int, Float or Object, we assume them to be strings (TODO: sane assumption?) + * @param array scala List[Any] + * @return converted java ArrayList[Object] + */ + def scalaListToJavaObjectArrayList(array: List[Any]): util.ArrayList[Object] = { + val out = new util.ArrayList[Object]() + + array.foreach { + case x: Long => out.add(Long.box(x)) + case x: Int => out.add(Int.box(x)) + case x: Char => out.add(Char.box(x)) + case x: Byte => out.add(Byte.box(x)) + case x: Double => out.add(Double.box(x)) + case x: Float => out.add(Float.box(x)) + case x: Boolean => out.add(Boolean.box(x)) + case x: String => out.add(x) + case x: Object => out.add(x) + case x => out.add(x.toString) + } + out + } + + //TODO: Add genotype comparing to this function + def identicalVariantContext(var1: VariantContext, var2: VariantContext): Boolean = { + var1.getContig == var2.getContig && + var1.getStart == var2.getStart && + var1.getEnd == var2.getEnd && + var1.getAttributes == var2.getAttributes + } } diff --git a/public/biopet-framework/src/test/resources/chrQ2.vcf b/public/biopet-framework/src/test/resources/chrQ2.vcf new file mode 100644 index 0000000000000000000000000000000000000000..e49f468d7a6d54de23ed5e3d118d45a663c1cb63 --- /dev/null +++ b/public/biopet-framework/src/test/resources/chrQ2.vcf @@ -0,0 +1,85 @@ +##fileformat=VCFv4.1 +##reference=file:///data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta +##UnifiedGenotyperCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 16:58:07 CEST 2014",Epoch=1402757887567,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/Child_7006504.ug.chrom_merged.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=SNP filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> +##UnifiedGenotyperCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 17:01:08 CEST 2014",Epoch=1402758068552,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.recalibrated.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.indel.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.indel.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=INDEL filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> +##UnifiedGenotyperCommandLine=<ID=UnifiedGenotyper,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Child_7006504.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Mother_7006508.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Father_7006506.ready.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[chrM] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=BOTH pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=20.0 standard_min_confidence_threshold_for_emitting=20.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name=dbsnp source=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/dbsnp_137.hg19_nohap.vcf) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false",Date="Sat Jun 14 15:20:24 CEST 2014",Epoch=1402752024377,Version=3.1-1-g07a4bf8> +##HaplotypeCallerCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 22:28:02 CEST 2014",Epoch=1402777682364,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/Child_7006504.hc.chrom_merged.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=SNP filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> +##HaplotypeCallerCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 22:31:13 CEST 2014",Epoch=1402777873043,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.recalibrated.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.indel.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.indel.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=INDEL filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> +##HaplotypeCallerCommandLine=<ID=HaplotypeCaller,CommandLineOptions="analysis_type=HaplotypeCaller input_file=[/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Child_7006504.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Mother_7006508.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Father_7006506.ready.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[chrM] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub likelihoodCalculationEngine=PairHMM heterogeneousKmerSizeResolution=COMBO_MIN graphOutput=null bamOutput=null bam_compression=null disable_bam_indexing=null generate_md5=null simplifyBAM=null bamWriterType=CALLED_HAPLOTYPES dbsnp=(RodBinding name=dbsnp source=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/dbsnp_137.hg19_nohap.vcf) dontTrimActiveRegions=false maxDiscARExtension=25 maxGGAARExtension=300 paddingAroundIndels=150 paddingAroundSNPs=20 comp=[] annotation=[ClippingRankSumTest, DepthPerSampleHC] excludeAnnotation=[SpanningDeletions, TandemRepeatAnnotator] heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=20.0 standard_min_confidence_threshold_for_emitting=20.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null kmerSize=[10, 25] dontIncreaseKmerSizesForCycles=false numPruningSamples=1 recoverDanglingHeads=false dontRecoverDanglingTails=false consensus=false emitRefConfidence=NONE GVCFGQBands=[5, 20, 60] indelSizeToEliminateInRefModel=10 min_base_quality_score=10 minPruning=2 gcpHMM=10 includeUmappedReads=false useAllelesTrigger=false useFilteredReadsForAnnotations=false phredScaledGlobalReadMismappingRate=45 maxNumHaplotypesInPopulation=128 mergeVariantsViaLD=false pair_hmm_implementation=LOGLESS_CACHING keepRG=null justDetermineActiveRegions=false dontGenotype=false errorCorrectKmers=false debug=false debugGraphTransformations=false dontUseSoftClippedBases=false captureAssemblyFailureBAM=false allowCyclesInKmerGraphToGeneratePaths=false noFpga=false errorCorrectReads=false kmerLengthForReadErrorCorrection=25 minObservationsForKmerToBeSolid=20 pcr_indel_model=CONSERVATIVE activityProfileOut=null activeRegionOut=null activeRegionIn=null activeRegionExtension=null forceActive=false activeRegionMaxSize=null bandPassSigma=null min_mapping_quality_score=20 filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false",Date="Sat Jun 14 15:26:18 CEST 2014",Epoch=1402752378803,Version=3.1-1-g07a4bf8> +##INFO=<ID=DN,Number=1,Type=Integer,Description="inDbSNP"> +##INFO=<ID=DT,Number=0,Type=Flag,Description="in1000Genomes"> +##INFO=<ID=DA,Number=1,Type=String,Description="allelesDBSNP"> +##INFO=<ID=FG,Number=.,Type=String,Description="functionGVS"> +##INFO=<ID=FD,Number=.,Type=String,Description="functionDBSNP"> +##INFO=<ID=GM,Number=.,Type=String,Description="accession"> +##INFO=<ID=GL,Number=.,Type=String,Description="geneList"> +##INFO=<ID=AAC,Number=.,Type=String,Description="aminoAcids"> +##INFO=<ID=PP,Number=.,Type=String,Description="proteinPosition"> +##INFO=<ID=CDP,Number=.,Type=String,Description="cDNAPosition"> +##INFO=<ID=PH,Number=.,Type=String,Description="polyPhen"> +##INFO=<ID=CP,Number=1,Type=String,Description="scorePhastCons"> +##INFO=<ID=CG,Number=1,Type=String,Description="consScoreGERP"> +##INFO=<ID=AA,Number=1,Type=String,Description="chimpAllele"> +##INFO=<ID=CN,Number=.,Type=String,Description="CNV"> +##INFO=<ID=HA,Number=1,Type=String,Description="AfricanHapMapFreq"> +##INFO=<ID=HE,Number=1,Type=String,Description="EuropeanHapMapFreq"> +##INFO=<ID=HC,Number=1,Type=String,Description="AsianHapMapFreq"> +##INFO=<ID=DG,Number=0,Type=Flag,Description="hasGenotypes"> +##INFO=<ID=DV,Number=.,Type=String,Description="dbSNPValidation"> +##INFO=<ID=RM,Number=.,Type=String,Description="repeatMasker"> +##INFO=<ID=RT,Number=.,Type=String,Description="tandemRepeat"> +##INFO=<ID=CA,Number=0,Type=Flag,Description="clinicalAssociation"> +##INFO=<ID=DSP,Number=1,Type=Integer,Description="distanceToSplice"> +##INFO=<ID=GS,Number=.,Type=String,Description="granthamScore"> +##INFO=<ID=MR,Number=.,Type=String,Description="microRNAs"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> +##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> +##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?"> +##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions"> +##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval"> +##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> +##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes"> +##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation"> +##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"> +##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"> +##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> +##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads"> +##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> +##INFO=<ID=NEGATIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the negative training set of bad variants"> +##INFO=<ID=POSITIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the positive training set of good variants"> +##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> +##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)"> +##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)"> +##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> +##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat"> +##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="Log odds ratio of being a true variant versus being false under the trained gaussian mixture model"> +##INFO=<ID=culprit,Number=1,Type=String,Description="The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"> +##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases"> +##INFO=<ID=GATKCaller,Number=.,Type=String,Description="GATK variant caller used to call the variant"> +##INFO=<ID=PartOfCompound,Number=.,Type=String,Description="Whether the record was originally part of a record containing compound variants"> +##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> +##FILTER=<ID=LowQual,Description="Low quality"> +##FILTER=<ID=VQSRTrancheINDEL99.00to99.90,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -1.4714 <= x < -0.3324"> +##FILTER=<ID=VQSRTrancheINDEL99.90to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -6.093"> +##FILTER=<ID=VQSRTrancheINDEL99.90to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -6.093 <= x < -1.4714"> +##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -4.8126 <= x < 0.2264"> +##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -39474.9285"> +##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -39474.9285 <= x < -4.8126"> +##FILTER=<ID=TooHigh1000GAF,Description="Allele frequency in 1000G is more than 5%"> +##FILTER=<ID=TooHighGoNLAF,Description="Allele frequency in 1000G is more than 5%"> +##FILTER=<ID=IndexNotCalled,Description="Position in index sample is not called"> +##FILTER=<ID=IndexIsVariant,Description="Index call is a variant"> +##FILTER=<ID=InArtificialChrom,Description="Variant found in an artificial chromosome"> +##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region"> +##contig=<ID=chrQ,length=16571> +##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|AA_MAF|EA_MAF|ALLELE_NUM|DISTANCE|STRAND|CLIN_SIG|SYMBOL|SYMBOL_SOURCE|GMAF|HGVSc|HGVSp|AFR_MAF|AMR_MAF|ASN_MAF|EUR_MAF|PUBMED"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Child_7006504 Father_7006506 Mother_7006508 +chrQ 50 rs199537431 T A 1541.12 PASS FG=intron;FD=unknown;GM=NM_152486.2;GL=SAMD11;CP=0.000;CG=-1.630;CN=2294,3274,30362,112930;DSP=107;AC=2;AF=0.333;AN=6;BaseQRankSum=4.068;DB;DP=124;FS=1.322;MLEAC=2;MLEAF=0.333;MQ=60.0;MQ0=0;MQRankSum=-0.197;QD=19.03;RPA=1,2;RU=A;ReadPosRankSum=-0.424;STR;VQSLOD=0.079;culprit=FS;GATKCaller=UG,HC;CSQ=A|ENSESTG00000013623|ENSESTT00000034081|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034081.1:c.306-110_306-109insA||||||,A|CCDS2.2|CCDS2.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|CCDS2.2:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034116|Transcript|upstream_gene_variant||||||rs199537431|||1|3610|1||||A:0.0078|||||||,A|ENSESTG00000013623|ENSESTT00000034091|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034091.1:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034102|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034102.1:c.29-110_29-109insA||||||,A|148398|XM_005244723.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244723.1:c.306-110_306-109insA||||||,A|148398|XM_005244724.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244724.1:c.306-110_306-109insA||||||,A|148398|XM_005244725.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244725.1:c.306-110_306-109insA||||||,A|148398|NM_152486.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|NM_152486.2:c.306-110_306-109insA||||||,A|148398|XM_005244727.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244727.1:c.306-110_306-109insA||||||,A|148398|XM_005244726.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244726.1:c.306-110_306-109insA|||||| GT:AD:DP:GQ:PL 0/1:24,21:45:99:838,0,889 0/1:17,19:36:99:744,0,603 0/0:42,0:43:99:0,126,1717 diff --git a/public/biopet-framework/src/test/resources/chrQ2.vcf.gz b/public/biopet-framework/src/test/resources/chrQ2.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..22435b2c513dc40a2f9632f1970395188292aa67 Binary files /dev/null and b/public/biopet-framework/src/test/resources/chrQ2.vcf.gz differ diff --git a/public/biopet-framework/src/test/resources/chrQ2.vcf.gz.tbi b/public/biopet-framework/src/test/resources/chrQ2.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..d376218edbf3aeb9bcbf9a16275c36a6005c57b2 Binary files /dev/null and b/public/biopet-framework/src/test/resources/chrQ2.vcf.gz.tbi differ diff --git a/public/biopet-framework/src/test/resources/chrQ_allN.fa b/public/biopet-framework/src/test/resources/chrQ_allN.fa new file mode 100644 index 0000000000000000000000000000000000000000..f2f89ba9c8b9bda54f666e0894e2234856aefc1b --- /dev/null +++ b/public/biopet-framework/src/test/resources/chrQ_allN.fa @@ -0,0 +1,2 @@ +>chrQ +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN diff --git a/public/biopet-framework/src/test/resources/chrQ_allN.fa.fai b/public/biopet-framework/src/test/resources/chrQ_allN.fa.fai new file mode 100644 index 0000000000000000000000000000000000000000..b7a558fdb3b3c0e85f6e3c634cc3ae80c601336d --- /dev/null +++ b/public/biopet-framework/src/test/resources/chrQ_allN.fa.fai @@ -0,0 +1 @@ +chrQ 16571 6 16571 16572 diff --git a/public/biopet-framework/src/test/resources/flagstat_crossreport.txt b/public/biopet-framework/src/test/resources/flagstat_crossreport.txt new file mode 100644 index 0000000000000000000000000000000000000000..74eabb4125ad9351ff4691a7e5cf0fa68282249c --- /dev/null +++ b/public/biopet-framework/src/test/resources/flagstat_crossreport.txt @@ -0,0 +1,15 @@ + #1 #2 #3 #4 #5 #6 #7 #8 #9 #10 #11 #12 #13 #14 +#1 1 1 0 1 0 0 0 1 1 1 0 0 0 0 +#2 1 1 0 1 0 0 0 1 1 1 0 0 0 0 +#3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#4 1 1 0 1 0 0 0 1 1 1 0 0 0 0 +#5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#8 1 1 0 1 0 0 0 1 1 1 0 0 0 0 +#9 1 1 0 1 0 0 0 1 1 1 0 0 0 0 +#10 1 1 0 1 0 0 0 1 1 1 0 0 0 0 +#11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/public/biopet-framework/src/test/resources/flagstat_crosstrue.txt b/public/biopet-framework/src/test/resources/flagstat_crosstrue.txt new file mode 100644 index 0000000000000000000000000000000000000000..dd05aed41c50b81790742957b210b21840918826 --- /dev/null +++ b/public/biopet-framework/src/test/resources/flagstat_crosstrue.txt @@ -0,0 +1,15 @@ + #1 #2 #3 #4 #5 #6 #7 #8 #9 #10 #11 #12 #13 #14 +#1 100.0000% 100.0000% 0.0000% 100.0000% 0.0000% 0.0000% 0.0000% 100.0000% 100.0000% 100.0000% 0.0000% 0.0000% 0.0000% 0.0000% +#2 100.0000% 100.0000% 0.0000% 100.0000% 0.0000% 0.0000% 0.0000% 100.0000% 100.0000% 100.0000% 0.0000% 0.0000% 0.0000% 0.0000% +#3 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#4 100.0000% 100.0000% 0.0000% 100.0000% 0.0000% 0.0000% 0.0000% 100.0000% 100.0000% 100.0000% 0.0000% 0.0000% 0.0000% 0.0000% +#5 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#6 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#7 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#8 100.0000% 100.0000% 0.0000% 100.0000% 0.0000% 0.0000% 0.0000% 100.0000% 100.0000% 100.0000% 0.0000% 0.0000% 0.0000% 0.0000% +#9 100.0000% 100.0000% 0.0000% 100.0000% 0.0000% 0.0000% 0.0000% 100.0000% 100.0000% 100.0000% 0.0000% 0.0000% 0.0000% 0.0000% +#10 100.0000% 100.0000% 0.0000% 100.0000% 0.0000% 0.0000% 0.0000% 100.0000% 100.0000% 100.0000% 0.0000% 0.0000% 0.0000% 0.0000% +#11 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#12 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#13 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#14 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% diff --git a/public/biopet-framework/src/test/resources/flagstat_report.txt b/public/biopet-framework/src/test/resources/flagstat_report.txt new file mode 100644 index 0000000000000000000000000000000000000000..acbe332a82a9d7c04c71e5c9ae74275b3c75b0cc --- /dev/null +++ b/public/biopet-framework/src/test/resources/flagstat_report.txt @@ -0,0 +1,48 @@ +Number Total Flags Fraction Name +#1 1 100.0000% All +#2 1 100.0000% Mapped +#3 0 0.0000% Duplicates +#4 1 100.0000% FirstOfPair +#5 0 0.0000% SecondOfPair +#6 0 0.0000% ReadNegativeStrand +#7 0 0.0000% NotPrimaryAlignment +#8 1 100.0000% ReadPaired +#9 1 100.0000% ProperPair +#10 1 100.0000% MateNegativeStrand +#11 0 0.0000% MateUnmapped +#12 0 0.0000% ReadFailsVendorQualityCheck +#13 0 0.0000% SupplementaryAlignment +#14 0 0.0000% SecondaryOrSupplementary + + #1 #2 #3 #4 #5 #6 #7 #8 #9 #10 #11 #12 #13 #14 +#1 1 1 0 1 0 0 0 1 1 1 0 0 0 0 +#2 1 1 0 1 0 0 0 1 1 1 0 0 0 0 +#3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#4 1 1 0 1 0 0 0 1 1 1 0 0 0 0 +#5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#8 1 1 0 1 0 0 0 1 1 1 0 0 0 0 +#9 1 1 0 1 0 0 0 1 1 1 0 0 0 0 +#10 1 1 0 1 0 0 0 1 1 1 0 0 0 0 +#11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + #1 #2 #3 #4 #5 #6 #7 #8 #9 #10 #11 #12 #13 #14 +#1 100.0000% 100.0000% 0.0000% 100.0000% 0.0000% 0.0000% 0.0000% 100.0000% 100.0000% 100.0000% 0.0000% 0.0000% 0.0000% 0.0000% +#2 100.0000% 100.0000% 0.0000% 100.0000% 0.0000% 0.0000% 0.0000% 100.0000% 100.0000% 100.0000% 0.0000% 0.0000% 0.0000% 0.0000% +#3 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#4 100.0000% 100.0000% 0.0000% 100.0000% 0.0000% 0.0000% 0.0000% 100.0000% 100.0000% 100.0000% 0.0000% 0.0000% 0.0000% 0.0000% +#5 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#6 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#7 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#8 100.0000% 100.0000% 0.0000% 100.0000% 0.0000% 0.0000% 0.0000% 100.0000% 100.0000% 100.0000% 0.0000% 0.0000% 0.0000% 0.0000% +#9 100.0000% 100.0000% 0.0000% 100.0000% 0.0000% 0.0000% 0.0000% 100.0000% 100.0000% 100.0000% 0.0000% 0.0000% 0.0000% 0.0000% +#10 100.0000% 100.0000% 0.0000% 100.0000% 0.0000% 0.0000% 0.0000% 100.0000% 100.0000% 100.0000% 0.0000% 0.0000% 0.0000% 0.0000% +#11 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#12 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#13 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% +#14 NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% NaN% + diff --git a/public/biopet-framework/src/test/resources/flagstat_summary.txt b/public/biopet-framework/src/test/resources/flagstat_summary.txt new file mode 100644 index 0000000000000000000000000000000000000000..e7984915cab425c8bd94dd0a22eae60e0407f518 --- /dev/null +++ b/public/biopet-framework/src/test/resources/flagstat_summary.txt @@ -0,0 +1,16 @@ +{ + "Duplicates" : 0, + "NotPrimaryAlignment" : 0, + "All" : 1, + "ReadNegativeStrand" : 0, + "ProperPair" : 1, + "MateUnmapped" : 0, + "ReadFailsVendorQualityCheck" : 0, + "Mapped" : 1, + "SupplementaryAlignment" : 0, + "MateNegativeStrand" : 1, + "FirstOfPair" : 1, + "ReadPaired" : 1, + "SecondaryOrSupplementary" : 0, + "SecondOfPair" : 0 +} \ No newline at end of file diff --git a/public/biopet-framework/src/test/resources/mini.transcriptome.fa b/public/biopet-framework/src/test/resources/mini.transcriptome.fa new file mode 100644 index 0000000000000000000000000000000000000000..d86c34faa29af176b6dd1a5d098c16d4e618039f --- /dev/null +++ b/public/biopet-framework/src/test/resources/mini.transcriptome.fa @@ -0,0 +1,17 @@ +>ENST00000529862 havana:known chromosome:GRCh38:11:105194440:105194946:-1 gene:ENSG00000254767 gene_biotype:unprocessed_pseudogene transcript_biotype:unprocessed_pseudogene +ATGAATAATAATGGGAAATATCAACATAAGTCTTGAAAATTACTTTATTCTACTGGGTCT +TTCTAATTGACCTCCTCTGGAAATAGTTATTTTTGTAGTTCTCTTGATATTCTGCTTCAT +GACACTGATAGGCAAGCTGTTCAGCATCATTCTGTCATACCTGGACTCCCATCCCCACAC +TCTCGGTACTTATTCTCTTTTCTGGATTTCTGCTACACCATCAGTTCCATCTTTTAATTA +CAGTACAATCTCTGGGGCCCACAGAAGAACATCTCTTATGCCAGTGGTATGATTCAAATT +TATTTTGTTCTCACACTGGGAACCATGGATTGCGCTCTACTGGTGGTGATGTCCAGGACT +GTGATGCAGCTGGACACAGACACTTGCCTTATACTGTTGTTATGGCTGTGGCTTTTTGGG +TAAGTAGCTTTACCAACTCAGCATTTGATTCCTTTTTTACCTTCTGGGTAACCCTGTGTG +GACATCACTATTATGCTTACATCTTTA +>ENST00000528941 havana:known chromosome:GRCh38:11:105246880:105247060:-1 gene:ENSG00000255336 gene_biotype:unprocessed_pseudogene transcript_biotype:unprocessed_pseudogene +TATTCATAATTAAAGTCATACTTCAGCAAGCTGGCTTTAAATATACAACATATAATTCTT +TTAAATCAGACTCTCTGAATCCATGACCGCCATGTCTTCATGAAGCTGTCCTTCCTCAAT +CCCCATCTGTTTTAAGGGTTCCACCCATGTTCTTCCTTAGCACCCTGAGTATTTACTCTA +T +>ENST99999999999 havana:known chromosome:GRCh38:11:105246880:105247060:-1 gene:ENSG99999999999 gene_biotype:unprocessed_pseudogene transcript_biotype:unprocessed_pseudogene +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA diff --git a/public/biopet-framework/src/test/resources/no_sample.tsv b/public/biopet-framework/src/test/resources/no_sample.tsv new file mode 100644 index 0000000000000000000000000000000000000000..81ccc2453208183e7fa8a8a2b0748f2a4d8f6716 --- /dev/null +++ b/public/biopet-framework/src/test/resources/no_sample.tsv @@ -0,0 +1,3 @@ +library bam +Lib_ID_1 MyFirst.bam +Lib_ID_2 MySecond.bam diff --git a/public/biopet-framework/src/test/resources/number.tsv b/public/biopet-framework/src/test/resources/number.tsv new file mode 100644 index 0000000000000000000000000000000000000000..0a76d53e6db7be2f331f440f72f48003d63ef49f --- /dev/null +++ b/public/biopet-framework/src/test/resources/number.tsv @@ -0,0 +1,3 @@ +sample library bam +1 5 MyFirst.bam +2 6 MySecond.bam diff --git a/public/biopet-framework/src/test/resources/paired01.pileup b/public/biopet-framework/src/test/resources/paired01.pileup new file mode 100644 index 0000000000000000000000000000000000000000..559c87b964c35b428bd913b32fd769873e24fd14 --- /dev/null +++ b/public/biopet-framework/src/test/resources/paired01.pileup @@ -0,0 +1,320 @@ +chrQ 50 N 1 ^]T E +chrQ 51 N 1 A E +chrQ 52 N 1 C F +chrQ 53 N 1 G F +chrQ 54 N 1 T G +chrQ 55 N 1 A G +chrQ 56 N 1 C H +chrQ 57 N 1 G H +chrQ 58 N 1 T I +chrQ 59 N 1 A$ I +chrQ 90 N 1 ^]a E +chrQ 91 N 1 t E +chrQ 92 N 1 g F +chrQ 93 N 1 c F +chrQ 94 N 1 a G +chrQ 95 N 1 t G +chrQ 96 N 1 g H +chrQ 97 N 1 c H +chrQ 98 N 1 a I +chrQ 99 N 1 t$ I +chrQ 150 N 1 ^]A G +chrQ 151 N 1 A G +chrQ 152 N 1 A G +chrQ 153 N 1 A G +chrQ 154 N 1 A G +chrQ 155 N 1 G G +chrQ 156 N 1 G G +chrQ 157 N 1 G G +chrQ 158 N 1 G G +chrQ 159 N 1 G$ G +chrQ 190 N 1 ^]g G +chrQ 191 N 1 g G +chrQ 192 N 1 g G +chrQ 193 N 1 g G +chrQ 194 N 1 g G +chrQ 195 N 1 a G +chrQ 196 N 1 a G +chrQ 197 N 1 a G +chrQ 198 N 1 a G +chrQ 199 N 1 a$ G +chrQ 250 N 1 ^]A G +chrQ 251 N 1 A G +chrQ 252 N 1 A G +chrQ 253 N 1 A G +chrQ 254 N 1 A G +chrQ 255 N 1 G G +chrQ 256 N 1 G G +chrQ 257 N 1 G G +chrQ 258 N 1 G G +chrQ 259 N 1 G$ G +chrQ 290 N 1 ^]g G +chrQ 291 N 1 g G +chrQ 292 N 1 g G +chrQ 293 N 1 g G +chrQ 294 N 1 g G +chrQ 295 N 1 a G +chrQ 296 N 1 a G +chrQ 297 N 1 a G +chrQ 298 N 1 a G +chrQ 299 N 1 a$ G +chrQ 450 N 1 ^]C E +chrQ 451 N 1 G E +chrQ 452 N 1 T F +chrQ 453 N 1 A F +chrQ 454 N 1 C G +chrQ 455 N 1 G G +chrQ 456 N 1 T H +chrQ 457 N 1 A H +chrQ 458 N 1 C I +chrQ 459 N 1 G$ I +chrQ 490 N 1 ^]g E +chrQ 491 N 1 c E +chrQ 492 N 1 a F +chrQ 493 N 1 t F +chrQ 494 N 1 g G +chrQ 495 N 1 c G +chrQ 496 N 1 a H +chrQ 497 N 1 t H +chrQ 498 N 1 g I +chrQ 499 N 1 c$ I +chrQ 650 N 1 ^]T H +chrQ 651 N 1 T H +chrQ 652 N 1 T H +chrQ 653 N 1 T H +chrQ 654 N 1 T H +chrQ 655 N 1 C H +chrQ 656 N 1 C H +chrQ 657 N 1 C H +chrQ 658 N 1 C H +chrQ 659 N 1 C$ H +chrQ 690 N 1 ^]c H +chrQ 691 N 1 c H +chrQ 692 N 1 c H +chrQ 693 N 1 c H +chrQ 694 N 1 c H +chrQ 695 N 1 t H +chrQ 696 N 1 t H +chrQ 697 N 1 t H +chrQ 698 N 1 t H +chrQ 699 N 1 t$ H +chrQ 890 N 1 ^]T E +chrQ 891 N 1 A E +chrQ 892 N 1 C F +chrQ 893 N 1 G F +chrQ 894 N 1 T G +chrQ 895 N 1 > G +chrQ 896 N 1 > G +chrQ 897 N 1 > G +chrQ 898 N 1 > G +chrQ 899 N 1 > G +chrQ 900 N 1 > G +chrQ 901 N 1 > G +chrQ 902 N 1 > G +chrQ 903 N 1 > G +chrQ 904 N 1 > G +chrQ 905 N 1 > G +chrQ 906 N 1 > G +chrQ 907 N 1 > G +chrQ 908 N 1 > G +chrQ 909 N 1 > G +chrQ 910 N 1 > G +chrQ 911 N 1 > G +chrQ 912 N 1 > G +chrQ 913 N 1 > G +chrQ 914 N 1 > G +chrQ 915 N 1 > G +chrQ 916 N 1 > G +chrQ 917 N 1 > G +chrQ 918 N 1 > G +chrQ 919 N 1 > G +chrQ 920 N 1 > G +chrQ 921 N 1 > G +chrQ 922 N 1 > G +chrQ 923 N 1 > G +chrQ 924 N 1 > G +chrQ 925 N 1 > G +chrQ 926 N 1 > G +chrQ 927 N 1 > G +chrQ 928 N 1 > G +chrQ 929 N 1 > G +chrQ 930 N 1 > G +chrQ 931 N 1 > G +chrQ 932 N 1 > G +chrQ 933 N 1 > G +chrQ 934 N 1 > G +chrQ 935 N 1 > G +chrQ 936 N 1 > G +chrQ 937 N 1 > G +chrQ 938 N 1 > G +chrQ 939 N 1 > G +chrQ 940 N 1 > G +chrQ 941 N 1 > G +chrQ 942 N 1 > G +chrQ 943 N 1 > G +chrQ 944 N 1 > G +chrQ 945 N 1 > G +chrQ 946 N 1 > G +chrQ 947 N 1 > G +chrQ 948 N 1 > G +chrQ 949 N 1 > G +chrQ 950 N 1 > G +chrQ 951 N 1 > G +chrQ 952 N 1 > G +chrQ 953 N 1 > G +chrQ 954 N 1 > G +chrQ 955 N 1 > G +chrQ 956 N 1 > G +chrQ 957 N 1 > G +chrQ 958 N 1 > G +chrQ 959 N 1 > G +chrQ 960 N 1 > G +chrQ 961 N 1 > G +chrQ 962 N 1 > G +chrQ 963 N 1 > G +chrQ 964 N 1 > G +chrQ 965 N 1 > G +chrQ 966 N 1 > G +chrQ 967 N 1 > G +chrQ 968 N 1 > G +chrQ 969 N 1 > G +chrQ 970 N 1 > G +chrQ 971 N 1 > G +chrQ 972 N 1 > G +chrQ 973 N 1 > G +chrQ 974 N 1 > G +chrQ 975 N 1 > G +chrQ 976 N 1 > G +chrQ 977 N 1 > G +chrQ 978 N 1 > G +chrQ 979 N 1 > G +chrQ 980 N 1 > G +chrQ 981 N 1 > G +chrQ 982 N 1 > G +chrQ 983 N 1 > G +chrQ 984 N 1 > G +chrQ 985 N 1 > G +chrQ 986 N 1 > G +chrQ 987 N 1 > G +chrQ 988 N 1 > G +chrQ 989 N 1 > G +chrQ 990 N 1 > G +chrQ 991 N 1 > G +chrQ 992 N 1 > G +chrQ 993 N 1 > G +chrQ 994 N 1 > G +chrQ 995 N 1 > G +chrQ 996 N 1 > G +chrQ 997 N 1 > G +chrQ 998 N 1 > G +chrQ 999 N 1 > G +chrQ 1000 N 1 > G +chrQ 1001 N 1 > G +chrQ 1002 N 1 > G +chrQ 1003 N 1 > G +chrQ 1004 N 1 > G +chrQ 1005 N 1 > G +chrQ 1006 N 1 > G +chrQ 1007 N 1 > G +chrQ 1008 N 1 > G +chrQ 1009 N 1 > G +chrQ 1010 N 1 > G +chrQ 1011 N 1 > G +chrQ 1012 N 1 > G +chrQ 1013 N 1 > G +chrQ 1014 N 1 > G +chrQ 1015 N 1 > G +chrQ 1016 N 1 > G +chrQ 1017 N 1 > G +chrQ 1018 N 1 > G +chrQ 1019 N 1 > G +chrQ 1020 N 1 > G +chrQ 1021 N 1 > G +chrQ 1022 N 1 > G +chrQ 1023 N 1 > G +chrQ 1024 N 1 > G +chrQ 1025 N 1 > G +chrQ 1026 N 1 > G +chrQ 1027 N 1 > G +chrQ 1028 N 1 > G +chrQ 1029 N 1 > G +chrQ 1030 N 1 > G +chrQ 1031 N 1 > G +chrQ 1032 N 1 > G +chrQ 1033 N 1 > G +chrQ 1034 N 1 > G +chrQ 1035 N 1 > G +chrQ 1036 N 1 > G +chrQ 1037 N 1 > G +chrQ 1038 N 1 > G +chrQ 1039 N 1 > G +chrQ 1040 N 1 > G +chrQ 1041 N 1 > G +chrQ 1042 N 1 > G +chrQ 1043 N 1 > G +chrQ 1044 N 1 > G +chrQ 1045 N 1 > G +chrQ 1046 N 1 > G +chrQ 1047 N 1 > G +chrQ 1048 N 1 > G +chrQ 1049 N 1 > G +chrQ 1050 N 1 > G +chrQ 1051 N 1 > G +chrQ 1052 N 1 > G +chrQ 1053 N 1 > G +chrQ 1054 N 1 > G +chrQ 1055 N 1 > G +chrQ 1056 N 1 > G +chrQ 1057 N 1 > G +chrQ 1058 N 1 > G +chrQ 1059 N 1 > G +chrQ 1060 N 1 > G +chrQ 1061 N 1 > G +chrQ 1062 N 1 > G +chrQ 1063 N 1 > G +chrQ 1064 N 1 > G +chrQ 1065 N 1 > G +chrQ 1066 N 1 > G +chrQ 1067 N 1 > G +chrQ 1068 N 1 > G +chrQ 1069 N 1 > G +chrQ 1070 N 1 > G +chrQ 1071 N 1 > G +chrQ 1072 N 1 > G +chrQ 1073 N 1 > G +chrQ 1074 N 1 > G +chrQ 1075 N 1 > G +chrQ 1076 N 1 > G +chrQ 1077 N 1 > G +chrQ 1078 N 1 > G +chrQ 1079 N 1 > G +chrQ 1080 N 1 > G +chrQ 1081 N 1 > G +chrQ 1082 N 1 > G +chrQ 1083 N 1 > G +chrQ 1084 N 1 > G +chrQ 1085 N 1 > G +chrQ 1086 N 1 > G +chrQ 1087 N 1 > G +chrQ 1088 N 1 > G +chrQ 1089 N 1 > G +chrQ 1090 N 1 > G +chrQ 1091 N 1 > G +chrQ 1092 N 1 > G +chrQ 1093 N 1 > G +chrQ 1094 N 1 > G +chrQ 1095 N 1 A G +chrQ 1096 N 1 C H +chrQ 1097 N 1 G H +chrQ 1098 N 1 T I +chrQ 1099 N 1 A$ I +chrQ 1140 N 1 ^]a E +chrQ 1141 N 1 t E +chrQ 1142 N 1 g F +chrQ 1143 N 1 c F +chrQ 1144 N 1 a G +chrQ 1145 N 1 t G +chrQ 1146 N 1 g H +chrQ 1147 N 1 c H +chrQ 1148 N 1 a I +chrQ 1149 N 1 t$ I diff --git a/public/biopet-framework/src/test/resources/sageAllGenesTest.tsv b/public/biopet-framework/src/test/resources/sageAllGenesTest.tsv new file mode 100644 index 0000000000000000000000000000000000000000..602518753b0b9a24a18c6561fbb0b6aabd99a2fe --- /dev/null +++ b/public/biopet-framework/src/test/resources/sageAllGenesTest.tsv @@ -0,0 +1,3 @@ +ENSG00000255336 +ENSG00000254767 +ENSG99999999999 diff --git a/public/biopet-framework/src/test/resources/sageNoAntiTest.tsv b/public/biopet-framework/src/test/resources/sageNoAntiTest.tsv new file mode 100644 index 0000000000000000000000000000000000000000..84f1b39db543978adcf0f29ae27c96aacfe823f7 --- /dev/null +++ b/public/biopet-framework/src/test/resources/sageNoAntiTest.tsv @@ -0,0 +1 @@ +ENSG99999999999 diff --git a/public/biopet-framework/src/test/resources/sageNoTagsTest.tsv b/public/biopet-framework/src/test/resources/sageNoTagsTest.tsv new file mode 100644 index 0000000000000000000000000000000000000000..84f1b39db543978adcf0f29ae27c96aacfe823f7 --- /dev/null +++ b/public/biopet-framework/src/test/resources/sageNoTagsTest.tsv @@ -0,0 +1 @@ +ENSG99999999999 diff --git a/public/biopet-framework/src/test/resources/sageTest.tsv b/public/biopet-framework/src/test/resources/sageTest.tsv new file mode 100644 index 0000000000000000000000000000000000000000..080395ff9049459f7a43dfc131beb05367d89b7c --- /dev/null +++ b/public/biopet-framework/src/test/resources/sageTest.tsv @@ -0,0 +1,9 @@ +#tag firstTag AllTags FirstAntiTag AllAntiTags +CATGAAGACATGGCGGTCATG ENSG00000255336 +CATGAAGCAGAATATCAAGAG ENSG00000254767 +CATGACACTGATAGGCAAGCT ENSG00000254767 +CATGACCGCCATGTCTTCATG ENSG00000255336 +CATGGATTGCGCTCTACTGGT ENSG00000254767 ENSG00000254767 +CATGGGTGGAACCCTTAAAAC ENSG00000255336 ENSG00000255336 +CATGGTTCCCAGTGTGAGAAC ENSG00000254767 ENSG00000254767 +CATGTTCTTCCTTAGCACCCT ENSG00000255336 ENSG00000255336 diff --git a/public/biopet-framework/src/test/resources/same.tsv b/public/biopet-framework/src/test/resources/same.tsv new file mode 100644 index 0000000000000000000000000000000000000000..e82fcbb3b50a1c8a613480ad0b0ef76673a0060c --- /dev/null +++ b/public/biopet-framework/src/test/resources/same.tsv @@ -0,0 +1,3 @@ +sample library bam +Sample_ID_1 Lib_ID_1 MyFirst.bam +Sample_ID_1 Lib_ID_1 MySecond.bam diff --git a/public/biopet-framework/src/test/resources/sample.tsv b/public/biopet-framework/src/test/resources/sample.tsv new file mode 100644 index 0000000000000000000000000000000000000000..3c67fc7c1cbc58ad00d7869ed1275b9a85e96cf3 --- /dev/null +++ b/public/biopet-framework/src/test/resources/sample.tsv @@ -0,0 +1,3 @@ +sample library bam +Sample_ID_1 Lib_ID_1 MyFirst.bam +Sample_ID_2 Lib_ID_2 MySecond.bam diff --git a/public/biopet-framework/src/test/resources/tagCount.tsv b/public/biopet-framework/src/test/resources/tagCount.tsv new file mode 100644 index 0000000000000000000000000000000000000000..64181d09a20cdab0c5bfc4792dc081fe5fd0f222 --- /dev/null +++ b/public/biopet-framework/src/test/resources/tagCount.tsv @@ -0,0 +1,8 @@ +CATGAAGACATGGCGGTCATG 20 +CATGAAGCAGAATATCAAGAG 25 +CATGACACTGATAGGCAAGCT 30 +CATGACCGCCATGTCTTCATG 35 +CATGGATTGCGCTCTACTGGT 40 +CATGGGTGGAACCCTTAAAAC 45 +CATGGTTCCCAGTGTGAGAAC 50 +CATGTTCTTCCTTAGCACCCT 55 diff --git a/public/biopet-framework/src/test/resources/test.summary.json b/public/biopet-framework/src/test/resources/test.summary.json new file mode 100644 index 0000000000000000000000000000000000000000..aff9e962e4662f9f76f4a53f2a8fe0557ecb92d4 --- /dev/null +++ b/public/biopet-framework/src/test/resources/test.summary.json @@ -0,0 +1,17 @@ +{ + "samples" : { + "016" : { + "libraries" : { + "L001" : { + "flexiprep" : { + "settings" : { + "skip_trim" : false, + "skip_clip" : false, + "paired" : true + } + } + } + } + } + } +} diff --git a/public/biopet-framework/src/test/resources/unvep_online.vcf.gz b/public/biopet-framework/src/test/resources/unvep_online.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..f102295f99d0bc62e25de296b84dc6610930a683 Binary files /dev/null and b/public/biopet-framework/src/test/resources/unvep_online.vcf.gz differ diff --git a/public/biopet-framework/src/test/resources/unvep_online.vcf.gz.tbi b/public/biopet-framework/src/test/resources/unvep_online.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..bb43ff545f591dd276973e7158919e6d14c78f23 Binary files /dev/null and b/public/biopet-framework/src/test/resources/unvep_online.vcf.gz.tbi differ diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..ebe393bfab7bdb8fedec024319e99a5b0ec29a07 --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala @@ -0,0 +1,73 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import htsjdk.variant.vcf.VCFFileReader +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test +import org.scalatest.mock.MockitoSugar +import org.mockito.Mockito._ + +/** + * Created by ahbbollen on 13-8-15. + */ +class BastyGenerateFastaTest extends TestNGSuite with MockitoSugar with Matchers { + + import BastyGenerateFasta._ + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + val vepped_path = resourcePath("/VEP_oneline.vcf") + val vepped = new File(vepped_path) + val bam_path = resourcePath("/paired01.bam") + val chrQ_path = resourcePath("/chrQ.vcf.gz") + val chrQRef_path = resourcePath("/fake_chrQ.fa") + val bam = new File(resourcePath("/paired01.bam")) + val chrQ = new File(resourcePath("/chrQ.vcf.gz")) + val chrQRef = new File(resourcePath("/fake_chrQ.fa")) + + @Test def testMainVcf = { + val tmp = File.createTempFile("basty_out", ".fa") + val tmppath = tmp.getAbsolutePath + tmp.deleteOnExit() + + val arguments = Array("-V", chrQ_path, "--outputVariants", tmppath, "--sampleName", "Child_7006504", "--reference", chrQRef_path, "--outputName", "test") + main(arguments) + } + + @Test def testMainVcfAndBam = { + val tmp = File.createTempFile("basty_out", ".fa") + val tmppath = tmp.getAbsolutePath + tmp.deleteOnExit() + + val arguments = Array("-V", chrQ_path, "--outputVariants", tmppath, "--bamFile", bam_path, "--sampleName", "Child_7006504", "--reference", chrQRef_path, "--outputName", "test") + main(arguments) + } + + @Test def testMainVcfAndBamMore = { + val tmp = File.createTempFile("basty_out", ".fa") + val tmppath = tmp.getAbsolutePath + tmp.deleteOnExit() + + val arguments = Array("-V", chrQ_path, "--outputConsensus", tmppath, "--outputConsensusVariants", tmppath, "--bamFile", bam_path, "--sampleName", "Child_7006504", "--reference", chrQRef_path, "--outputName", "test") + main(arguments) + } + + @Test def testGetMaxAllele = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + val child = mock[Args] + when(child.sampleName) thenReturn "Child_7006504" + val father = mock[Args] + when(father.sampleName) thenReturn "Father_7006506" + + getMaxAllele(record)(child) shouldBe "C-" + getMaxAllele(record)(father) shouldBe "CA" + + } + +} diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstatTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstatTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..758cb6be6d6aa0a75c2ec1830e0fa3334beb7626 --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstatTest.scala @@ -0,0 +1,62 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import htsjdk.samtools.SamReaderFactory +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.io.Source + +/** + * Created by ahbbollen on 26-8-15. + */ +class BiopetFlagstatTest extends TestNGSuite with MockitoSugar with Matchers { + + import BiopetFlagstat._ + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + val bam = new File(resourcePath("/paired01.bam")) + val report = new File(resourcePath("/flagstat_report.txt")) + val summary = new File(resourcePath("/flagstat_summary.txt")) + val crossReport = new File(resourcePath("/flagstat_crossreport.txt")) + val crossTrue = new File(resourcePath("/flagstat_crosstrue.txt")) + + val record = SamReaderFactory.makeDefault().open(bam).iterator().next() + val processor = new FlagstatCollector + processor.loadDefaultFunctions() + processor.loadRecord(record) + + @Test + def testReport() = { + processor.report shouldBe Source.fromFile(report).mkString + } + + @Test + def testSummary() = { + processor.summary shouldBe Source.fromFile(summary).mkString + } + + @Test + def testCrossReport() = { + processor.crossReport() shouldBe Source.fromFile(crossReport).mkString + } + + @Test + def testCrossReportTrue() = { + processor.crossReport(true) shouldBe Source.fromFile(crossTrue).mkString + } + + @Test + def testMain() = { + //TODO: Test output file + val output = File.createTempFile("testMain", ".biopetflagstat") + main(Array("-I", bam.getAbsolutePath, "-o", output.toString)) + } + +} diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBamTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBamTest.scala index 6a99b92d3fd3b9ba16f9c09e67094cff1e40bedf..476e8e6230caa6bd1051a683459ca28566516699 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBamTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBamTest.scala @@ -15,8 +15,11 @@ */ package nl.lumc.sasc.biopet.tools +import java.io.File import java.nio.file.Paths +import htsjdk.samtools.{ SamReaderFactory, SamReader } +import htsjdk.variant.vcf.VCFFileReader import org.scalatest.Matchers import org.scalatest.mock.MockitoSugar import org.scalatest.testng.TestNGSuite @@ -38,6 +41,7 @@ class CheckAllelesVcfInBamTest extends TestNGSuite with MockitoSugar with Matche val vcf = resourcePath("/chrQ.vcf") val bam = resourcePath("/single01.bam") + val vcf2 = new File(resourcePath("/chrQ2.vcf.gz")) val rand = new Random() @Test def testOutputTypeVcf() = { @@ -58,4 +62,19 @@ class CheckAllelesVcfInBamTest extends TestNGSuite with MockitoSugar with Matche main(arguments) } + @Test + def testCheckAllelesNone() = { + val variant = new File(vcf) + val samRecord = SamReaderFactory.makeDefault().open(new File(bam)).iterator().next() + val varRecord = new VCFFileReader(variant, false).iterator().next() + checkAlleles(samRecord, varRecord) shouldBe None + } + + @Test + def testCheckAlleles() = { + val samRecord = SamReaderFactory.makeDefault().open(new File(bam)).iterator().next() + val varRecord = new VCFFileReader(vcf2).iterator().next() + checkAlleles(samRecord, varRecord) shouldBe Some("T") + } + } diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSplitterTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSplitterTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..ffa8be0bcb6c2cf15bf0ad00efeeb54e903eb3df --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSplitterTest.scala @@ -0,0 +1,39 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by ahbbollen on 27-8-15. + */ +class FastqSplitterTest extends TestNGSuite with MockitoSugar with Matchers { + + import FastqSplitter._ + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + val fq = resourcePath("/paired01a.fq") + + @Test + def testMain() = { + val temp = File.createTempFile("out", ".fastq") + + val args = Array("-I", fq, "-o", temp.getAbsolutePath) + main(args) + } + + @Test + def testManyOutMain() = { + val files = (0 until 10).map(_ => File.createTempFile("out", ".fastq")) + var args = Array("-I", fq) + files.foreach(x => args ++= Array("-o", x.getAbsolutePath)) + main(args) + } + +} diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBioTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBioTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..b6de2f8bcca896e8979c2e21fc5bb8651524610a --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBioTest.scala @@ -0,0 +1,64 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import htsjdk.samtools.{ SamReaderFactory, QueryInterval } +import nl.lumc.sasc.biopet.tools.FastqSplitter._ +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.collection.immutable.Nil + +/** + * Created by ahbbollen on 27-8-15. + */ +class FindRepeatsPacBioTest extends TestNGSuite with MockitoSugar with Matchers { + + import FindRepeatsPacBio._ + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + val bed = resourcePath("/rrna01.bed") + val bam = resourcePath("/paired01.bam") + + @Test + def testMain() = { + + val outputFile = File.createTempFile("repeats", ".tsv") + val args = Array("-I", bam, "-b", bed, "-o", outputFile.toString) + main(args) + } + + @Test + def testResult() = { + val samReader = SamReaderFactory.makeDefault().open(new File(bam)) + val header = samReader.getFileHeader + val record = samReader.iterator().next() + val interval = new QueryInterval(header.getSequenceIndex("chrQ"), 50, 55) + val result = procesSamrecord(record, interval) + + result.isEmpty shouldBe false + + result.get.samRecord shouldEqual record + result.get.beginDel should be >= 0 + result.get.endDel should be >= 0 + } + + @Test + def testResultObject = { + val record = SamReaderFactory.makeDefault().open(new File(bam)).iterator().next() + val result = new Result + result.samRecord = record + + result.samRecord shouldEqual record + result.beginDel shouldBe 0 + result.endDel shouldBe 0 + result.dels shouldEqual Nil + result.ins shouldEqual Nil + } + +} diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/MpileupToVcfTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/MpileupToVcfTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..032f9a913373c0f3be617e45cc26a928ca69821d --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/MpileupToVcfTest.scala @@ -0,0 +1,78 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import htsjdk.samtools.reference.IndexedFastaSequenceFile +import htsjdk.variant.variantcontext.Allele +import htsjdk.variant.vcf.VCFFileReader +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.collection.JavaConversions._ + +/** + * Created by ahbbollen on 27-8-15. + */ +class MpileupToVcfTest extends TestNGSuite with MockitoSugar with Matchers { + + import MpileupToVcf._ + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + val pileup = resourcePath("/paired01.pileup") + + @Test + def testMain() = { + val tmp = File.createTempFile("mpileup", ".vcf") + val args = Array("-I", pileup, "--sample", "test", "-o", tmp.getAbsolutePath) + + main(args) + } + + @Test + def validateOutVcf() = { + val tmp = File.createTempFile("mpileup", ".vcf") + val args = Array("-I", pileup, "--sample", "test", "-o", tmp.getAbsolutePath, "--minDP", "1", "--minAP", "1") + main(args) + + val vcfReader = new VCFFileReader(tmp, false) + + // VariantContexts validate on creation + // therefore we just have to loop through them + + vcfReader.foreach(_ => 1) + + } + + @Test + def extraValidateOutVcf() = { + val tmp = File.createTempFile("mpileup", ".vcf") + val args = Array("-I", pileup, "--sample", "test", "-o", tmp.getAbsolutePath, "--minDP", "1", "--minAP", "1") + main(args) + + val vcfReader = new VCFFileReader(tmp, false) + + val fasta = resourcePath("/chrQ_allN.fa") + + val sequenceFile = new IndexedFastaSequenceFile(new File(fasta)) + val sequenceDict = sequenceFile.getSequenceDictionary + + for (record <- vcfReader) { + val alleles = record.getAlleles.toSet + var ref_alleles = alleles -- record.getAlternateAlleles.toSet + + ref_alleles.size should be >= 1 + + val realRef = Allele.create(sequenceFile.getSubsequenceAt(record.getContig, + record.getStart, record.getEnd).getBases, true) + + for (ref <- ref_alleles) { + record.extraStrictValidation(ref, realRef, Set("")) + } + } + } +} diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/PrefixFastqTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/PrefixFastqTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..41db9e3ae268a62860fd3fbe9a27e4430165429e --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/PrefixFastqTest.scala @@ -0,0 +1,47 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import htsjdk.samtools.fastq.FastqReader +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.collection.JavaConversions._ + +/** + * Created by ahbbollen on 28-8-15. + */ +class PrefixFastqTest extends TestNGSuite with MockitoSugar with Matchers { + + import PrefixFastq._ + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + val fq = resourcePath("/paired01a.fq") + + @Test + def testMain() = { + val temp = File.createTempFile("out", ".fastq") + + val args = Array("-i", fq, "-o", temp.getAbsolutePath, "-s", "AAA") + main(args) + } + + @Test + def testOutput() = { + val temp = File.createTempFile("out", ".fastq") + + val args = Array("-i", fq, "-o", temp.getAbsolutePath, "-s", "AAA") + main(args) + + val reader = new FastqReader(temp) + + for (read <- reader.iterator()) { + read.getReadString.startsWith("AAA") shouldBe true + } + } +} diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SageCountFastqTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SageCountFastqTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..15d3074c95313c6f2a084c41b4d0a4787bdedddb --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SageCountFastqTest.scala @@ -0,0 +1,30 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by ahbbollen on 28-8-15. + */ +class SageCountFastqTest extends TestNGSuite with MockitoSugar with Matchers { + import SageCountFastq._ + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + val fq = resourcePath("/paired01a.fq") + + @Test + def testMain() = { + val temp = File.createTempFile("out", ".fastq") + + val args = Array("-I", fq, "-o", temp.getAbsolutePath) + main(args) + } + +} diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateLibaryTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateLibaryTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..48038ea6f476fd89a945f53cf3d3608661fbcc9c --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateLibaryTest.scala @@ -0,0 +1,112 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import org.biojava3.core.sequence.DNASequence +import org.biojava3.core.sequence.io.FastaReaderHelper +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.collection.JavaConversions._ + +import scala.io.Source + +/** + * Created by ahbbollen on 7-9-15. + */ +class SageCreateLibaryTest extends TestNGSuite with MockitoSugar with Matchers { + + import SageCreateLibrary._ + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + @Test + def testMain = { + + val input = resourcePath("/mini.transcriptome.fa") + val output = File.createTempFile("sageCreateLibrary", ".tsv") + val noTagsOutput = File.createTempFile("sageCreateLibrary", ".tsv") + val antiTagsOutput = File.createTempFile("sageCreateLibrary", ".tsv") + val allGenesOutput = File.createTempFile("sageCreateLibrary", ".tsv") + + val args = Array("-I", input, "-o", output.getAbsolutePath, "--tag", "CATG", + "--length", "17", "--noTagsOutput", noTagsOutput.getAbsolutePath, "--noAntiTagsOutput", + antiTagsOutput.getAbsolutePath, "--allGenesOutput", allGenesOutput.getAbsolutePath) + + noException should be thrownBy main(args) + + val args2 = Array("-I", input, "-o", output.getAbsolutePath, "--tag", "CATG", + "--length", "17") + noException should be thrownBy main(args2) + val args3 = Array("-I", input, "-o", output.getAbsolutePath, "--tag", "CATG", + "--length", "17", "--noTagsOutput", noTagsOutput.getAbsolutePath) + noException should be thrownBy main(args3) + + } + + @Test + def testOutPut = { + val input = resourcePath("/mini.transcriptome.fa") + val output = File.createTempFile("sageCreateLibrary", ".tsv") + val noTagsOutput = File.createTempFile("sageCreateLibrary", ".tsv") + val antiTagsOutput = File.createTempFile("sageCreateLibrary", ".tsv") + val allGenesOutput = File.createTempFile("sageCreateLibrary", ".tsv") + + val args = Array("-I", input, "-o", output.getAbsolutePath, "--tag", "CATG", + "--length", "17", "--noTagsOutput", noTagsOutput.getAbsolutePath, "--noAntiTagsOutput", + antiTagsOutput.getAbsolutePath, "--allGenesOutput", allGenesOutput.getAbsolutePath) + main(args) + + Source.fromFile(output).mkString should equal( + Source.fromFile(new File(resourcePath("/sageTest.tsv"))).mkString + ) + + Source.fromFile(noTagsOutput).mkString should equal( + Source.fromFile(new File(resourcePath("/sageNoTagsTest.tsv"))).mkString + ) + + Source.fromFile(antiTagsOutput).mkString should equal( + Source.fromFile(new File(resourcePath("/sageNoAntiTest.tsv"))).mkString + ) + + Source.fromFile(allGenesOutput).mkString should equal( + Source.fromFile(new File(resourcePath("/sageAllGenesTest.tsv"))).mkString + ) + } + + @Test + def testGetTags = { + val input = resourcePath("/mini.transcriptome.fa") + + val reader = FastaReaderHelper.readFastaDNASequence(new File(input)) + + val records = reader.iterator.toList + val tagRegex = ("CATG" + "[CATG]{" + 17 + "}").r + + val record1 = records(0) + val record2 = records(1) + val record3 = records(2) + + val result1 = getTags(record1._1, record1._2, tagRegex) + val result2 = getTags(record2._1, record2._2, tagRegex) + val result3 = getTags(record3._1, record3._2, tagRegex) + + result1.allTags.size shouldBe 2 + result1.allAntiTags.size shouldBe 2 + result1.firstTag shouldBe "CATGGATTGCGCTCTACTGGT" + result1.firstAntiTag shouldBe "CATGGTTCCCAGTGTGAGAAC" + + result2.allTags.size shouldBe 2 + result2.allAntiTags.size shouldBe 2 + result2.firstTag shouldBe "CATGTTCTTCCTTAGCACCCT" + result2.firstAntiTag shouldBe "CATGGGTGGAACCCTTAAAAC" + + result3.allTags.size shouldBe 0 + result3.allAntiTags.size shouldBe 0 + } + +} diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCountsTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCountsTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..b47c507856ec251878057a3823d47d57160045e9 --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCountsTest.scala @@ -0,0 +1,67 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.io.Source + +/** + * Created by ahbbollen on 7-9-15. + */ +class SageCreateTagCountsTest extends TestNGSuite with MockitoSugar with Matchers { + + import SageCreateTagCounts._ + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + @Test + def testMain = { + val input = resourcePath("/tagCount.tsv") + val tagLib = resourcePath("/sageTest.tsv") + + val sense = File.createTempFile("SageCreateTagCountsTEst", ".tsv") + val allSense = File.createTempFile("SageCreateTagCountsTEst", ".tsv") + val antiSense = File.createTempFile("SageCreateTagCountsTEst", ".tsv") + val allAntiSense = File.createTempFile("SageCreateTagCountsTEst", ".tsv") + + noException should be thrownBy main(Array("-I", input, "--tagLib", tagLib, + "--countSense", sense.getAbsolutePath, "--countAllSense", allSense.getAbsolutePath, + "--countAntiSense", antiSense.getAbsolutePath, "--countAllAntiSense", allAntiSense.getAbsolutePath)) + noException should be thrownBy main(Array("-I", input, "--tagLib", tagLib, + "--countSense", sense.getAbsolutePath, "--countAllSense", allSense.getAbsolutePath, + "--countAntiSense", antiSense.getAbsolutePath)) + noException should be thrownBy main(Array("-I", input, "--tagLib", tagLib, + "--countSense", sense.getAbsolutePath, "--countAllSense", allSense.getAbsolutePath)) + noException should be thrownBy main(Array("-I", input, "--tagLib", tagLib, + "--countSense", sense.getAbsolutePath)) + noException should be thrownBy main(Array("-I", input, "--tagLib", tagLib)) + + } + + @Test + def testOutput = { + val input = resourcePath("/tagCount.tsv") + val tagLib = resourcePath("/sageTest.tsv") + + val sense = File.createTempFile("SageCreateTagCountsTEst", ".tsv") + val allSense = File.createTempFile("SageCreateTagCountsTEst", ".tsv") + val antiSense = File.createTempFile("SageCreateTagCountsTEst", ".tsv") + val allAntiSense = File.createTempFile("SageCreateTagCountsTEst", ".tsv") + + main(Array("-I", input, "--tagLib", tagLib, "--countSense", sense.getAbsolutePath, + "--countAllSense", allSense.getAbsolutePath, "--countAntiSense", antiSense.getAbsolutePath, + "--countAllAntiSense", allAntiSense.getAbsolutePath)) + + Source.fromFile(sense).mkString should equal("ENSG00000254767\t40\nENSG00000255336\t55\n") + Source.fromFile(allSense).mkString should equal("ENSG00000254767\t70\nENSG00000255336\t90\n") + Source.fromFile(antiSense).mkString should equal("ENSG00000254767\t50\nENSG00000255336\t45\n") + Source.fromFile(allAntiSense).mkString should equal("ENSG00000254767\t75\nENSG00000255336\t65\n") + } + +} diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJsonTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJsonTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..9866ad47a8efbfad5f34868b82fe415f6e6b8c28 --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJsonTest.scala @@ -0,0 +1,79 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by ahbbollen on 28-8-15. + */ +class SamplesTsvToJsonTest extends TestNGSuite with MockitoSugar with Matchers { + import SamplesTsvToJson._ + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + @Test + def testCorrectSampleTsv = { + val tsv = resourcePath("/sample.tsv") + val output = File.createTempFile("testCorrectSampleTsv", ".json") + + noException should be thrownBy main(Array("-i", tsv, "-o", output.toString)) + } + + @Test + def testNoSampleColumn() = { + val tsv = resourcePath("/no_sample.tsv") + val output = File.createTempFile("testNoSampleColumn", ".json") + val thrown = the[IllegalStateException] thrownBy main(Array("-i", tsv, "-o", output.toString)) + thrown.getMessage should equal("Sample column does not exist in: " + tsv) + } + + @Test + def testNumberInLibs = { + val tsv = resourcePath("/number.tsv") + val output = File.createTempFile("testNumberInLibs", ".json") + val thrown = the[IllegalStateException] thrownBy main(Array("-i", tsv, "-o", output.toString)) + thrown.getMessage should equal("Sample or library may not start with a number") + } + + @Test + def testSampleIDs = { + val tsv = resourcePath("/same.tsv") + val output = File.createTempFile("testSampleIDs", ".json") + val thrown = the[IllegalStateException] thrownBy main(Array("-i", tsv, "-o", output.toString)) + thrown.getMessage should equal("Combination of Sample_ID_1 and Lib_ID_1 is found multiple times") + + } + + @Test + def testJson = { + val tsv = new File(resourcePath("/sample.tsv")) + val json = stringFromInputs(List(tsv)) + + json should equal( + """|{ + | "samples" : { + | "Sample_ID_1" : { + | "libraries" : { + | "Lib_ID_1" : { + | "bam" : "MyFirst.bam" + | } + | } + | }, + | "Sample_ID_2" : { + | "libraries" : { + | "Lib_ID_2" : { + | "bam" : "MySecond.bam" + | } + | } + | } + | } + |}""".stripMargin) + } + +} diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SeqStatTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SeqStatTest.scala index d9180aec45025b68e9830227110ca9320571b683..c9dd5e290c3cd4da97c58198da2a596258ed0c64 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SeqStatTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SeqStatTest.scala @@ -101,4 +101,6 @@ class SeqStatTest extends TestNGSuite with MockitoSugar with Matchers { val parsed = parseArgs(args) parsed.fastq shouldBe resourceFile("/paired01a.fq") } + + // TODO: Shared state here. Calling main changes the state, which causes other tests to fail } \ No newline at end of file diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SummaryToTsvTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SummaryToTsvTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..916fd48cc35991c0a6a47be4f646b179d8f66831 --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SummaryToTsvTest.scala @@ -0,0 +1,73 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import nl.lumc.sasc.biopet.tools.SamplesTsvToJson._ +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import nl.lumc.sasc.biopet.core.summary.Summary + +/** + * Created by ahbbollen on 31-8-15. + */ +class SummaryToTsvTest extends TestNGSuite with MockitoSugar with Matchers { + import SummaryToTsv._ + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + @Test + def testMain = { + val tsv = resourcePath("/test.summary.json") + val output = File.createTempFile("main", "tsv") + + noException should be thrownBy main(Array("-s", tsv, "-p", "something=flexiprep:settings:skip_trim", + "-m", "root", "-o", output.toString)) + noException should be thrownBy main(Array("-s", tsv, "-p", "something=flexiprep:settings:skip_trim", + "-m", "sample", "-o", output.toString)) + noException should be thrownBy main(Array("-s", tsv, "-p", "something=flexiprep:settings:skip_trim", + "-m", "lib", "-o", output.toString)) + } + + @Test + def testHeader = { + val tsv = resourcePath("/test.summary.json") + val path = List("something=flexiprep:settings:skip_trim") + + val paths = path.map(x => { + val split = x.split("=", 2) + split(0) -> split(1).split(":") + }).toMap + + createHeader(paths) should equal("\tsomething") + } + + @Test + def testLine = { + val tsv = resourcePath("/test.summary.json") + val path = List("something=flexiprep:settings:skip_trim") + + val paths = path.map(x => { + val split = x.split("=", 2) + split(0) -> split(1).split(":") + }).toMap + + val summary = new Summary(new File(tsv)) + val values = fetchValues(summary, paths) + + val line = values.head._2.keys.map(x => createLine(paths, values, x)).head + line should equal("value\t") + val sample_values = fetchValues(summary, paths, true, false) + val sample_line = sample_values.head._2.keys.map(x => createLine(paths, sample_values, x)).head + sample_line should equal("016\t") + + val lib_values = fetchValues(summary, paths, false, true) + val lib_line = lib_values.head._2.keys.map(x => createLine(paths, lib_values, x)).head + lib_line should equal("016-L001\tfalse") + } + +} diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala index 2b47405a46b9971aab59ff2c4eaa088efea7f052..80fe1980eccad9932e0472ae28242ae93e6b6420 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala @@ -81,4 +81,129 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers { hasGenotype(record, List(("Mother_7006508", GenotypeType.HET), ("Child_7006504", GenotypeType.HOM_REF))) shouldBe false } + @Test def testMinQualScore() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + minQualscore(record, 2000) shouldBe false + minQualscore(record, 1000) shouldBe true + + } + + @Test def testHasNonRefCalls() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + hasNonRefCalls(record) shouldBe true + } + + @Test def testHasCalls() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + hasCalls(record) shouldBe true + } + + @Test def testHasMinDP() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + hasMinTotalDepth(record, 100) shouldBe true + hasMinTotalDepth(record, 200) shouldBe false + } + + @Test def testHasMinSampleDP() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + hasMinSampleDepth(record, 30, 1) shouldBe true + hasMinSampleDepth(record, 30, 2) shouldBe true + hasMinSampleDepth(record, 30, 3) shouldBe true + hasMinSampleDepth(record, 40, 1) shouldBe true + hasMinSampleDepth(record, 40, 2) shouldBe true + hasMinSampleDepth(record, 40, 3) shouldBe false + hasMinSampleDepth(record, 50, 1) shouldBe false + hasMinSampleDepth(record, 50, 2) shouldBe false + hasMinSampleDepth(record, 50, 3) shouldBe false + } + + @Test def testHasMinSampleAD() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + minAlternateDepth(record, 0, 3) shouldBe true + minAlternateDepth(record, 10, 2) shouldBe true + minAlternateDepth(record, 10, 3) shouldBe false + minAlternateDepth(record, 20, 1) shouldBe true + minAlternateDepth(record, 20, 2) shouldBe false + } + + @Test def testMustHaveVariant() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + mustHaveVariant(record, List("Child_7006504")) shouldBe true + mustHaveVariant(record, List("Child_7006504", "Father_7006506")) shouldBe true + mustHaveVariant(record, List("Child_7006504", "Father_7006506", "Mother_7006508")) shouldBe false + } + + @Test def testSameGenotype() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + notSameGenotype(record, "Child_7006504", "Father_7006506") shouldBe false + notSameGenotype(record, "Child_7006504", "Mother_7006508") shouldBe true + notSameGenotype(record, "Father_7006506", "Mother_7006508") shouldBe true + } + + @Test def testfilterHetVarToHomVar() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + filterHetVarToHomVar(record, "Child_7006504", "Father_7006506") shouldBe true + filterHetVarToHomVar(record, "Child_7006504", "Mother_7006508") shouldBe true + filterHetVarToHomVar(record, "Father_7006506", "Mother_7006508") shouldBe true + } + + @Test def testDeNovo() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + denovoInSample(record, "Child_7006504") shouldBe false + denovoInSample(record, "Father_7006506") shouldBe false + denovoInSample(record, "Mother_7006508") shouldBe false + } + + @Test def testResToDom() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + val trio = new Trio("Child_7006504", "Father_7006506", "Mother_7006508") + + resToDom(record, List(trio)) shouldBe false + } + + @Test def testTrioCompound = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + val trio = new Trio("Child_7006504", "Father_7006506", "Mother_7006508") + + trioCompound(record, List(trio)) + } + + @Test def testDeNovoTrio = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + val trio = new Trio("Child_7006504", "Father_7006506", "Mother_7006508") + + denovoTrio(record, List(trio)) + } + + @Test def testInIDSet() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + inIdSet(record, Set("rs199537431")) shouldBe true + inIdSet(record, Set("dummy")) shouldBe false + } + } diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfStatsTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfStatsTest.scala index 0ffe4713b117f8797be7b075e5a948bb0f709722..b7a30c52e615c18502646d23b325637e288e8e72 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfStatsTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfStatsTest.scala @@ -15,7 +15,11 @@ */ package nl.lumc.sasc.biopet.tools +import java.io.File +import java.nio.file.{ Files, Paths } + import htsjdk.variant.variantcontext.Allele +import htsjdk.variant.vcf.VCFFileReader import nl.lumc.sasc.biopet.tools.VcfStats._ import org.scalatest.Matchers import org.scalatest.testng.TestNGSuite @@ -29,6 +33,9 @@ import scala.collection.mutable * Created by pjvan_thof on 2/5/15. */ class VcfStatsTest extends TestNGSuite with Matchers { + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } @Test def testSampleToSampleStats(): Unit = { @@ -116,4 +123,292 @@ class VcfStatsTest extends TestNGSuite with Matchers { alleleOverlap(List(a1, a1), List(a2, a2)) shouldBe 0 alleleOverlap(List(a2, a2), List(a1, a1)) shouldBe 0 } + + @Test + def testMergeStatsMap = { + val m1: mutable.Map[Any, Int] = mutable.Map("a" -> 1) + val m2: mutable.Map[Any, Int] = mutable.Map("b" -> 2) + + mergeStatsMap(m1, m2) + + m1 should equal(mutable.Map("a" -> 1, "b" -> 2)) + + val m3: mutable.Map[Any, Int] = mutable.Map(1 -> 500) + val m4: mutable.Map[Any, Int] = mutable.Map(6 -> 125) + + mergeStatsMap(m3, m4) + + m3 should equal(mutable.Map(1 -> 500, 6 -> 125)) + + mergeStatsMap(m1, m3) + + m1 should equal(mutable.Map("a" -> 1, "b" -> 2, 1 -> 500, 6 -> 125)) + } + + @Test + def testMergeNestedStatsMap = { + val m1: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map("test" -> + mutable.Map("nested" -> mutable.Map("a" -> 1))) + val m2: Map[String, Map[String, Map[Any, Int]]] = Map("test" -> + Map("nested" -> Map("b" -> 2))) + + mergeNestedStatsMap(m1, m2) + + m1 should equal(mutable.Map("test" -> mutable.Map("nested" -> mutable.Map("a" -> 1, "b" -> 2)))) + + val m3: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map("test" -> + mutable.Map("nestedd" -> mutable.Map(1 -> 500))) + val m4: Map[String, Map[String, Map[Any, Int]]] = Map("test" -> + Map("nestedd" -> Map(6 -> 125))) + + mergeNestedStatsMap(m3, m4) + + m3 should equal(mutable.Map("test" -> mutable.Map("nestedd" -> mutable.Map(1 -> 500, 6 -> 125)))) + + val m5 = m3.toMap.map(x => x._1 -> x._2.toMap.map(y => y._1 -> y._2.toMap)) + + mergeNestedStatsMap(m1, m5) + + m1 should equal(mutable.Map("test" -> mutable.Map("nested" -> mutable.Map("a" -> 1, "b" -> 2), + "nestedd" -> mutable.Map(1 -> 500, 6 -> 125)))) + } + + @Test + def testValueOfTsv = { + val i = new File(resourcePath("/sample.tsv")) + + valueFromTsv(i, "Sample_ID_1", "library") should be(Some("Lib_ID_1")) + valueFromTsv(i, "Sample_ID_2", "library") should be(Some("Lib_ID_2")) + valueFromTsv(i, "Sample_ID_1", "bam") should be(Some("MyFirst.bam")) + valueFromTsv(i, "Sample_ID_2", "bam") should be(Some("MySecond.bam")) + valueFromTsv(i, "Sample_ID_3", "bam") should be(empty) + } + + @Test + def testMain = { + val tmp = Files.createTempDirectory("vcfStats") + val vcf = resourcePath("/chrQ.vcf.gz") + val ref = resourcePath("/fake_chrQ.fa") + + noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o", tmp.toAbsolutePath.toString)) + noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o", tmp.toAbsolutePath.toString, "--allInfoTags")) + noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o", + tmp.toAbsolutePath.toString, "--allInfoTags", "--allGenotypeTags")) + noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o", + tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats")) + noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o", + tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats", + "--generalWiggle", "Total")) + noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o", + tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats", + "--genotypeWiggle", "Total")) + + val genotypes = List("Het", "HetNonRef", "Hom", "HomRef", "HomVar", "Mixed", "NoCall", "NonInformative", + "Available", "Called", "Filtered", "Variant") + + genotypes.foreach( + x => noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o", + tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats", + "--genotypeWiggle", x)) + ) + + val general = List("Biallelic", "ComplexIndel", "Filtered", "FullyDecoded", "Indel", "Mixed", + "MNP", "MonomorphicInSamples", "NotFiltered", "PointEvent", "PolymorphicInSamples", + "SimpleDeletion", "SimpleInsertion", "SNP", "StructuralIndel", "Symbolic", + "SymbolicOrSV", "Variant") + + general.foreach( + x => noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o", + tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats", + "--generalWiggle", x)) + ) + + // returns null when validation fails + def validateArgs(array: Array[String]): Option[Args] = { + val argsParser = new OptParser + argsParser.parse(array, Args()) + } + + val stderr1 = new java.io.ByteArrayOutputStream + Console.withErr(stderr1) { + validateArgs(Array("-I", vcf, "-R", ref, "-o", + tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats", + "--genotypeWiggle", "NonexistentThing")) shouldBe empty + } + + val stderr2 = new java.io.ByteArrayOutputStream + Console.withErr(stderr2) { + validateArgs(Array("-I", vcf, "-R", ref, "-o", + tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats", + "--generalWiggle", "NonexistentThing")) shouldBe empty + } + + val stderr3 = new java.io.ByteArrayOutputStream + Console.withErr(stderr3) { + validateArgs(Array("-R", ref, "-o", + tmp.toAbsolutePath.toString)) shouldBe empty + } + } + + @Test + def testSortAnyAny = { + //stub + val one: Any = 1 + val two: Any = 2 + val text: Any = "hello" + val text2: Any = "goodbye" + + sortAnyAny(one, two) shouldBe true + sortAnyAny(two, one) shouldBe false + sortAnyAny(text, text2) shouldBe false + sortAnyAny(text2, text) shouldBe true + sortAnyAny(one, text) shouldBe true + sortAnyAny(text, one) shouldBe false + } + + @Test + def testCheckGeneral = { + val record = new VCFFileReader(new File(resourcePath("/chrQ.vcf.gz"))).iterator().next() + + val blah = checkGeneral(record, List()) + + blah.get("chrQ") should not be empty + blah.get("total") should not be empty + + val chrq = blah.get("chrQ").get + chrq.get("SampleDistribution-NonInformative") shouldEqual Some(Map(0 -> 1)) + chrq.get("SampleDistribution-Called") shouldEqual Some(Map(3 -> 1)) + chrq.get("SampleDistribution-Mixed") shouldEqual Some(Map(0 -> 1)) + chrq.get("SampleDistribution-Hom") shouldEqual Some(Map(1 -> 1)) + chrq.get("SampleDistribution-HomRef") shouldEqual Some(Map(1 -> 1)) + chrq.get("SampleDistribution-Available") shouldEqual Some(Map(3 -> 1)) + chrq.get("QUAL") shouldEqual Some(Map(1541 -> 1)) + chrq.get("SampleDistribution-HetNonRef") shouldEqual Some(Map(0 -> 1)) + chrq.get("SampleDistribution-Het") shouldEqual Some(Map(2 -> 1)) + chrq.get("SampleDistribution-NoCall") shouldEqual Some(Map(0 -> 1)) + chrq.get("SampleDistribution-Filtered") shouldEqual Some(Map(0 -> 1)) + chrq.get("SampleDistribution-HomVar") shouldEqual Some(Map(0 -> 1)) + chrq.get("SampleDistribution-Variant") shouldEqual Some(Map(2 -> 1)) + + chrq.get("general") should not be empty + val general = chrq.get("general").get + + general.get("PolymorphicInSamples") shouldEqual Some(1) + general.get("ComplexIndel") shouldEqual Some(0) + general.get("FullyDecoded") shouldEqual Some(0) + general.get("PointEvent") shouldEqual Some(0) + general.get("MNP") shouldEqual Some(0) + general.get("Indel") shouldEqual Some(1) + general.get("Biallelic") shouldEqual Some(1) + general.get("SimpleDeletion") shouldEqual Some(0) + general.get("Variant") shouldEqual Some(1) + general.get("SymbolicOrSV") shouldEqual Some(0) + general.get("MonomorphicInSamples") shouldEqual Some(0) + general.get("SNP") shouldEqual Some(0) + general.get("Filtered") shouldEqual Some(0) + general.get("StructuralIndel") shouldEqual Some(0) + general.get("Total") shouldEqual Some(1) + general.get("Mixed") shouldEqual Some(0) + general.get("NotFiltered") shouldEqual Some(1) + general.get("Symbolic") shouldEqual Some(0) + general.get("SimpleInsertion") shouldEqual Some(1) + + val total = blah.get("total").get + total.get("SampleDistribution-NonInformative") shouldEqual Some(Map(0 -> 1)) + total.get("SampleDistribution-Called") shouldEqual Some(Map(3 -> 1)) + total.get("SampleDistribution-Mixed") shouldEqual Some(Map(0 -> 1)) + total.get("SampleDistribution-Hom") shouldEqual Some(Map(1 -> 1)) + total.get("SampleDistribution-HomRef") shouldEqual Some(Map(1 -> 1)) + total.get("SampleDistribution-Available") shouldEqual Some(Map(3 -> 1)) + total.get("QUAL") shouldEqual Some(Map(1541 -> 1)) + total.get("SampleDistribution-HetNonRef") shouldEqual Some(Map(0 -> 1)) + total.get("SampleDistribution-Het") shouldEqual Some(Map(2 -> 1)) + total.get("SampleDistribution-NoCall") shouldEqual Some(Map(0 -> 1)) + total.get("SampleDistribution-Filtered") shouldEqual Some(Map(0 -> 1)) + total.get("SampleDistribution-HomVar") shouldEqual Some(Map(0 -> 1)) + total.get("SampleDistribution-Variant") shouldEqual Some(Map(2 -> 1)) + + chrq.get("general") should not be empty + val totGeneral = total.get("general").get + + totGeneral.get("PolymorphicInSamples") shouldEqual Some(1) + totGeneral.get("ComplexIndel") shouldEqual Some(0) + totGeneral.get("FullyDecoded") shouldEqual Some(0) + totGeneral.get("PointEvent") shouldEqual Some(0) + totGeneral.get("MNP") shouldEqual Some(0) + totGeneral.get("Indel") shouldEqual Some(1) + totGeneral.get("Biallelic") shouldEqual Some(1) + totGeneral.get("SimpleDeletion") shouldEqual Some(0) + totGeneral.get("Variant") shouldEqual Some(1) + totGeneral.get("SymbolicOrSV") shouldEqual Some(0) + totGeneral.get("MonomorphicInSamples") shouldEqual Some(0) + totGeneral.get("SNP") shouldEqual Some(0) + totGeneral.get("Filtered") shouldEqual Some(0) + totGeneral.get("StructuralIndel") shouldEqual Some(0) + totGeneral.get("Total") shouldEqual Some(1) + totGeneral.get("Mixed") shouldEqual Some(0) + totGeneral.get("NotFiltered") shouldEqual Some(1) + totGeneral.get("Symbolic") shouldEqual Some(0) + totGeneral.get("SimpleInsertion") shouldEqual Some(1) + } + + @Test + def testCheckGenotype = { + val record = new VCFFileReader(new File(resourcePath("/chrQ.vcf.gz"))).iterator().next() + + val genotype = record.getGenotype(0) + + val blah = checkGenotype(record, genotype, List()) + + blah.get("chrQ") should not be empty + blah.get("total") should not be empty + + val chrq = blah.get("chrQ").get + chrq.get("GQ") shouldEqual Some(Map(99 -> 1)) + chrq.get("AD") shouldEqual Some(Map(24 -> 1, 21 -> 1)) + chrq.get("AD-used") shouldEqual Some(Map(24 -> 1, 21 -> 1)) + chrq.get("DP") shouldEqual Some(Map(45 -> 1)) + chrq.get("AD-alt") shouldEqual Some(Map(21 -> 1)) + chrq.get("AD-ref") shouldEqual Some(Map(24 -> 1)) + chrq.get("general") should not be empty + + val general = chrq.get("general").get + general.get("Hom") shouldEqual Some(0) + general.get("NoCall") shouldEqual Some(0) + general.get("Variant") shouldEqual Some(1) + general.get("Filtered") shouldEqual Some(0) + general.get("NonInformative") shouldEqual Some(0) + general.get("Called") shouldEqual Some(1) + general.get("Total") shouldEqual Some(1) + general.get("HomVar") shouldEqual Some(0) + general.get("HomRef") shouldEqual Some(0) + general.get("Mixed") shouldEqual Some(0) + general.get("Available") shouldEqual Some(1) + general.get("Het") shouldEqual Some(1) + general.get("HetNonRef") shouldEqual Some(0) + + val total = blah.get("total").get + total.get("GQ") shouldEqual Some(Map(99 -> 1)) + total.get("AD") shouldEqual Some(Map(24 -> 1, 21 -> 1)) + total.get("AD-used") shouldEqual Some(Map(24 -> 1, 21 -> 1)) + total.get("DP") shouldEqual Some(Map(45 -> 1)) + total.get("AD-alt") shouldEqual Some(Map(21 -> 1)) + total.get("AD-ref") shouldEqual Some(Map(24 -> 1)) + total.get("general") should not be empty + + val totGeneral = total.get("general").get + totGeneral.get("Hom") shouldEqual Some(0) + totGeneral.get("NoCall") shouldEqual Some(0) + totGeneral.get("Variant") shouldEqual Some(1) + totGeneral.get("Filtered") shouldEqual Some(0) + totGeneral.get("NonInformative") shouldEqual Some(0) + totGeneral.get("Called") shouldEqual Some(1) + totGeneral.get("Total") shouldEqual Some(1) + totGeneral.get("HomVar") shouldEqual Some(0) + totGeneral.get("HomRef") shouldEqual Some(0) + totGeneral.get("Mixed") shouldEqual Some(0) + totGeneral.get("Available") shouldEqual Some(1) + totGeneral.get("Het") shouldEqual Some(1) + totGeneral.get("HetNonRef") shouldEqual Some(0) + } } diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala index 9996dfed3befed4e8913ee1377d04953607dfbc9..af8ff6267d5c73dad9eaa10762b1d356a2641626 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala @@ -17,13 +17,18 @@ package nl.lumc.sasc.biopet.tools import java.io.File import java.nio.file.Paths +import java.util +import htsjdk.variant.vcf.VCFFileReader import org.scalatest.Matchers import org.scalatest.mock.MockitoSugar import org.scalatest.testng.TestNGSuite import org.testng.annotations.Test import scala.util.Random +import scala.collection.JavaConversions._ + +import nl.lumc.sasc.biopet.utils.VcfUtils.identicalVariantContext /** * Test class for [[VcfWithVcfTest]] @@ -38,7 +43,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { } val veppedPath = resourcePath("/VEP_oneline.vcf.gz") - val unveppedPath = resourcePath("/unvepped.vcf.gz") + val unveppedPath = resourcePath("/unvep_online.vcf.gz") val rand = new Random() @Test def testOutputTypeVcf() = { @@ -48,15 +53,129 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { } @Test def testOutputTypeVcfGz() = { - val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath + val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf.gz").getAbsolutePath val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ") main(arguments) } @Test def testOutputTypeBcf() = { - val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath + val tmpPath = File.createTempFile("VcfWithVcf_", ".bcf").getAbsolutePath val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ") main(arguments) } + @Test def testOutputFieldException = { + val tmpPath = File.createTempFile("VCFWithVCf", ".vcf").getAbsolutePath + val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ:AC") + an[IllegalArgumentException] should be thrownBy main(args) + val thrown = the[IllegalArgumentException] thrownBy main(args) + thrown.getMessage should equal("Field 'AC' already exists in input vcf") + } + + @Test def testInputFieldException = { + val tmpPath = File.createTempFile("VCFWithVCf", ".vcf").getAbsolutePath + val args = Array("-I", unveppedPath, "-s", unveppedPath, "-o", tmpPath, "-f", "CSQ:NEW_CSQ") + an[IllegalArgumentException] should be thrownBy main(args) + val thrown = the[IllegalArgumentException] thrownBy main(args) + thrown.getMessage should equal("Field 'CSQ' does not exist in secondary vcf") + } + + @Test def testMinMethodException = { + val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath + val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ:CSQ:min") + an[IllegalArgumentException] should be thrownBy main(args) + val thrown = the[IllegalArgumentException] thrownBy main(args) + thrown.getMessage should equal("Type of field CSQ is not numeric") + } + + @Test def testMaxMethodException = { + val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath + val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ:CSQ:max") + an[IllegalArgumentException] should be thrownBy main(args) + val thrown = the[IllegalArgumentException] thrownBy main(args) + thrown.getMessage should equal("Type of field CSQ is not numeric") + } + + @Test + def testFieldMap = { + val unvep_record = new VCFFileReader(new File(unveppedPath)).iterator().next() + + var fields = List(new Fields("FG", "FG")) + fields :::= List(new Fields("FD", "FD")) + fields :::= List(new Fields("GM", "GM")) + fields :::= List(new Fields("GL", "GL")) + fields :::= List(new Fields("CP", "CP")) + fields :::= List(new Fields("CG", "CG")) + fields :::= List(new Fields("CN", "CN")) + fields :::= List(new Fields("DSP", "DSP")) + fields :::= List(new Fields("AC", "AC")) + fields :::= List(new Fields("AF", "AF")) + fields :::= List(new Fields("AN", "AN")) + fields :::= List(new Fields("BaseQRankSum", "BaseQRankSum")) + fields :::= List(new Fields("DP", "DP")) + fields :::= List(new Fields("FS", "FS")) + fields :::= List(new Fields("MLEAC", "MLEAC")) + fields :::= List(new Fields("MLEAF", "MLEAF")) + fields :::= List(new Fields("MQ", "MQ")) + fields :::= List(new Fields("MQ0", "MQ0")) + fields :::= List(new Fields("MQRankSum", "MQRankSum")) + fields :::= List(new Fields("QD", "QD")) + fields :::= List(new Fields("RPA", "RPA")) + fields :::= List(new Fields("RU", "RU")) + fields :::= List(new Fields("ReadPosRankSum", "ReadPosRankSum")) + fields :::= List(new Fields("VQSLOD", "VQSLOD")) + fields :::= List(new Fields("culprit", "culprit")) + + val fieldMap = createFieldMap(fields, List(unvep_record)) + + fieldMap("FG") shouldBe List("intron") + fieldMap("FD") shouldBe List("unknown") + fieldMap("GM") shouldBe List("NM_152486.2") + fieldMap("GL") shouldBe List("SAMD11") + fieldMap("CP") shouldBe List("0.000") + fieldMap("CG") shouldBe List("-1.630") + fieldMap("CN") shouldBe List("2294", "3274", "30362", "112930") + fieldMap("DSP") shouldBe List("107") + fieldMap("AC") shouldBe List("2") + fieldMap("AF") shouldBe List("0.333") + fieldMap("AN") shouldBe List("6") + fieldMap("DP") shouldBe List("124") + fieldMap("FS") shouldBe List("1.322") + fieldMap("MLEAC") shouldBe List("2") + fieldMap("MLEAF") shouldBe List("0.333") + fieldMap("MQ") shouldBe List("60.0") + fieldMap("MQ0") shouldBe List("0") + fieldMap("MQRankSum") shouldBe List("-0.197") + fieldMap("QD") shouldBe List("19.03") + fieldMap("RPA") shouldBe List("1", "2") + fieldMap("RU") shouldBe List("A") + fieldMap("ReadPosRankSum") shouldBe List("-0.424") + fieldMap("VQSLOD") shouldBe List("0.079") + fieldMap("culprit") shouldBe List("FS") + + } + + @Test def testGetSecondaryRecords = { + val unvep_record = new VCFFileReader(new File(unveppedPath)).iterator().next() + val vep_reader = new VCFFileReader(new File(veppedPath)) + val vep_record = vep_reader.iterator().next() + + val secRec = getSecondaryRecords(vep_reader, unvep_record, false) + + secRec.foreach(x => identicalVariantContext(x, vep_record) shouldBe true) + } + + @Test def testCreateRecord = { + val unvep_record = new VCFFileReader(new File(unveppedPath)).iterator().next() + val vep_reader = new VCFFileReader(new File(veppedPath)) + val header = vep_reader.getFileHeader + val vep_record = vep_reader.iterator().next() + + val secRec = getSecondaryRecords(vep_reader, unvep_record, false) + + val fieldMap = createFieldMap(List(new Fields("CSQ", "CSQ")), secRec) + val created_record = createRecord(fieldMap, unvep_record, List(new Fields("CSQ", "CSQ")), header) + identicalVariantContext(created_record, vep_record) shouldBe true + } + } diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VepNormalizerTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VepNormalizerTest.scala index b2a063f9853c07ceb23f623b174e5d7980dc275f..5a12e9b579296b58a2764fdaa0b5b916717e384f 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VepNormalizerTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VepNormalizerTest.scala @@ -47,37 +47,37 @@ class VepNormalizerTest extends TestNGSuite with MockitoSugar with Matchers { val rand = new Random() @Test def testGzOutputExplode(): Unit = { - val tmp_path = "/tmp/VepNorm_" + rand.nextString(10) + ".vcf.gz" - val arguments: Array[String] = Array("-I", vepped_path, "-O", tmp_path, "-m", "explode") + val tmpPath = File.createTempFile("VepNormalizer_", ".vcf.gz").getAbsolutePath + val arguments: Array[String] = Array("-I", vepped_path, "-O", tmpPath, "-m", "explode") main(arguments) } @Test def testVcfOutputExplode(): Unit = { - val tmp_path = "/tmp/VepNorm_" + rand.nextString(10) + ".vcf" - val arguments: Array[String] = Array("-I", vepped_path, "-O", tmp_path, "-m", "explode") + val tmpPath = File.createTempFile("VepNormalizer_", ".vcf").getAbsolutePath + val arguments: Array[String] = Array("-I", vepped_path, "-O", tmpPath, "-m", "explode") main(arguments) } @Test def testBcfOutputExplode(): Unit = { - val tmp_path = "/tmp/VepNorm_" + rand.nextString(10) + ".bcf" + val tmp_path = File.createTempFile("VepNormalizer_", ".bcf").getAbsolutePath val arguments: Array[String] = Array("-I", vepped_path, "-O", tmp_path, "-m", "explode") main(arguments) } @Test def testGzOutputStandard(): Unit = { - val tmp_path = "/tmp/VepNorm_" + rand.nextString(10) + ".vcf.gz" + val tmp_path = File.createTempFile("VepNormalizer_", ".vcf.gz").getAbsolutePath val arguments: Array[String] = Array("-I", vepped_path, "-O", tmp_path, "-m", "standard") main(arguments) } @Test def testVcfOutputStandard(): Unit = { - val tmp_path = "/tmp/VepNorm_" + rand.nextString(10) + ".vcf" + val tmp_path = File.createTempFile("VepNormalizer_", ".vcf").getAbsolutePath val arguments: Array[String] = Array("-I", vepped_path, "-O", tmp_path, "-m", "standard") main(arguments) } @Test def testBcfOutputStandard(): Unit = { - val tmp_path = "/tmp/VepNorm_" + rand.nextString(10) + ".bcf" + val tmp_path = File.createTempFile("VepNormalizer_", ".bcf").getAbsolutePath val arguments: Array[String] = Array("-I", vepped_path, "-O", tmp_path, "-m", "standard") main(arguments) }