diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala index 0cd432c6e7021d73b10f7906af2af7110f2b9432..b033fb45ed88fc8cea258d7ca19db3a2553e4e5c 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala @@ -40,6 +40,6 @@ object SquishBed { val squishBed = new SquishBed(root) squishBed.input = input squishBed.output = new File(outputDir, input.getName.stripSuffix(".bed") + ".squish.bed") - return squishBed + squishBed } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBed.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBed.scala index 0f749e2832d69d8221c1ebbe68f3c301c7dc3ea4..2646ea9611df2a8d9f65c91b1a5131015ae4fd6a 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBed.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBed.scala @@ -26,13 +26,15 @@ import scala.collection.JavaConversions._ import scala.collection.mutable import scala.io.Source -/** - * Created by pjvan_thof on 1/10/15. - */ class AnnotateVcfWithBed { // TODO: Queue wrapper } +/** + * This a tools to annotate a vcf file with values from a bed file + * + * Created by pjvan_thof on 1/10/15. + */ object AnnotateVcfWithBed extends ToolCommand { /** @@ -52,30 +54,29 @@ object AnnotateVcfWithBed extends ToolCommand { fieldType: String = "String") extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required () unbounded () valueName ("<vcf file>") action { (x, c) => + opt[File]('I', "inputFile") required () unbounded () valueName "<vcf file>" action { (x, c) => c.copy(inputFile = x) - } text ("Input is a required file property") - opt[File]('B', "bedFile") required () unbounded () valueName ("<bed file>") action { (x, c) => + } text "Input is a required file property" + opt[File]('B', "bedFile") required () unbounded () valueName "<bed file>" action { (x, c) => c.copy(bedFile = x) - } text ("Bedfile is a required file property") - opt[File]('o', "output") required () unbounded () valueName ("<vcf file>") action { (x, c) => + } text "Bedfile is a required file property" + opt[File]('o', "output") required () unbounded () valueName "<vcf file>" action { (x, c) => c.copy(outputFile = x) - } text ("out is a required file property") - opt[String]('f', "fieldName") required () unbounded () valueName ("<name of field in vcf file>") action { (x, c) => + } text "out is a required file property" + opt[String]('f', "fieldName") required () unbounded () valueName "<name of field in vcf file>" action { (x, c) => c.copy(fieldName = x) - } text ("Name of info field in new vcf file") - opt[String]('d', "fieldDescription") unbounded () valueName ("<name of field in vcf file>") action { (x, c) => + } text "Name of info field in new vcf file" + opt[String]('d', "fieldDescription") unbounded () valueName "<name of field in vcf file>" action { (x, c) => c.copy(fieldDescription = x) - } text ("Description of field in new vcf file") - opt[String]('t', "fieldType") unbounded () valueName ("<name of field in vcf file>") action { (x, c) => + } text "Description of field in new vcf file" + opt[String]('t', "fieldType") unbounded () valueName "<name of field in vcf file>" action { (x, c) => c.copy(fieldType = x) - } text ("Description of field in new vcf file") + } text "Description of field in new vcf file" } /** - * Program will Annotate a vcf file with the overlapping regions of a bed file, 4e column of the bed file we in a info tag in the vcf file - * - * @param args + * Program will Annotate a vcf file with the overlapping regions of a bed file, + * 4e column of the bed file we in a info tag in the vcf file */ def main(args: Array[String]): Unit = { @@ -108,7 +109,7 @@ object AnnotateVcfWithBed extends ToolCommand { val values = line.split("\t") if (values.size >= 4) bedRecords(values(0)) = (values(1).toInt, values(2).toInt, values(3)) :: bedRecords.getOrElse(values(0), Nil) - else (values.size >= 3 && fieldType == VCFHeaderLineType.Flag) + else values.size >= 3 && fieldType == VCFHeaderLineType.Flag bedRecords(values(0)) = (values(1).toInt, values(2).toInt, "") :: bedRecords.getOrElse(values(0), Nil) } @@ -148,8 +149,8 @@ object AnnotateVcfWithBed extends ToolCommand { writer.add(builder.make) } } - reader.close - writer.close + reader.close() + writer.close() logger.info("Done") } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala index 53872b2caa8f8a68f7753f3bd605025f04883619..62cf74f8e6a0669666aa5b00adbed576b60ca827 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala @@ -58,8 +58,8 @@ class BastyGenerateFasta(val root: Configurable) extends ToolCommandFuntion with override val defaultCoreMemory = 4.0 - override def beforeGraph: Unit = { - super.beforeGraph + override def beforeGraph(): Unit = { + super.beforeGraph() reference = referenceFasta() } @@ -91,43 +91,43 @@ object BastyGenerateFasta extends ToolCommand { reference: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('V', "inputVcf") unbounded () valueName ("<file>") action { (x, c) => + opt[File]('V', "inputVcf") unbounded () valueName "<file>" action { (x, c) => c.copy(inputVcf = x) - } text ("vcf file, needed for outputVariants and outputConsensusVariants") validate { x => + } text "vcf file, needed for outputVariants and outputConsensusVariants" validate { x => if (x.exists) success else failure("File does not exist: " + x) } - opt[File]("bamFile") unbounded () valueName ("<file>") action { (x, c) => + opt[File]("bamFile") unbounded () valueName "<file>" action { (x, c) => c.copy(bamFile = x) - } text ("bam file, needed for outputConsensus and outputConsensusVariants") validate { x => + } text "bam file, needed for outputConsensus and outputConsensusVariants" validate { x => if (x.exists) success else failure("File does not exist: " + x) } - opt[File]("outputVariants") maxOccurs (1) unbounded () valueName ("<file>") action { (x, c) => + opt[File]("outputVariants") maxOccurs 1 unbounded () valueName "<file>" action { (x, c) => c.copy(outputVariants = x) - } text ("fasta with only variants from vcf file") - opt[File]("outputConsensus") maxOccurs (1) unbounded () valueName ("<file>") action { (x, c) => + } text "fasta with only variants from vcf file" + opt[File]("outputConsensus") maxOccurs 1 unbounded () valueName "<file>" action { (x, c) => c.copy(outputConsensus = x) - } text ("Consensus fasta from bam, always reference bases else 'N'") - opt[File]("outputConsensusVariants") maxOccurs (1) unbounded () valueName ("<file>") action { (x, c) => + } text "Consensus fasta from bam, always reference bases else 'N'" + opt[File]("outputConsensusVariants") maxOccurs 1 unbounded () valueName "<file>" action { (x, c) => c.copy(outputConsensusVariants = x) - } text ("Consensus fasta from bam with variants from vcf file, always reference bases else 'N'") + } text "Consensus fasta from bam with variants from vcf file, always reference bases else 'N'" opt[Unit]("snpsOnly") unbounded () action { (x, c) => c.copy(snpsOnly = true) - } text ("Only use snps from vcf file") + } text "Only use snps from vcf file" opt[String]("sampleName") unbounded () action { (x, c) => c.copy(sampleName = x) - } text ("Sample name in vcf file") + } text "Sample name in vcf file" opt[String]("outputName") required () unbounded () action { (x, c) => c.copy(outputName = x) - } text ("Output name in fasta file header") + } text "Output name in fasta file header" opt[Int]("minAD") unbounded () action { (x, c) => c.copy(minAD = x) - } text ("min AD value in vcf file for sample. Defaults to: 8") + } text "min AD value in vcf file for sample. Defaults to: 8" opt[Int]("minDepth") unbounded () action { (x, c) => c.copy(minDepth = x) - } text ("min depth in bam file. Defaults to: 8") + } text "min depth in bam file. Defaults to: 8" opt[File]("reference") unbounded () action { (x, c) => c.copy(reference = x) - } text ("Indexed reference fasta file") validate { x => + } text "Indexed reference fasta file" validate { x => if (x.exists) success else failure("File does not exist: " + x) } @@ -188,7 +188,7 @@ object BastyGenerateFasta extends ToolCommand { val variants: Map[(Int, Int), VariantContext] = if (cmdArgs.inputVcf != null) { val reader = new VCFFileReader(cmdArgs.inputVcf, true) - (for (variant <- reader.query(chrName, begin, end) if (!cmdArgs.snpsOnly || variant.isSNP)) yield { + (for (variant <- reader.query(chrName, begin, end) if !cmdArgs.snpsOnly || variant.isSNP) yield { (variant.getStart, variant.getEnd) -> variant }).toMap } else Map() @@ -202,10 +202,10 @@ object BastyGenerateFasta extends ToolCommand { for (t <- s to e) coverage(t - begin) += 1 } } else { - for (t <- 0 until coverage.length) coverage(t) = cmdArgs.minDepth + for (t <- coverage.indices) coverage(t) = cmdArgs.minDepth } - val consensus = for (t <- 0 until coverage.length) yield { + val consensus = for (t <- coverage.indices) yield { if (coverage(t) >= cmdArgs.minDepth) referenceSequence.getBases()(t).toChar else 'N' } @@ -222,7 +222,7 @@ object BastyGenerateFasta extends ToolCommand { val stripSufix = if (variant.get._1._2 > end) variant.get._1._2 - end else 0 val allele = getMaxAllele(variant.get._2) consensusPos += variant.get._2.getReference.getBases.length - buffer.append(allele.substring(stripPrefix, allele.size - stripSufix)) + buffer.append(allele.substring(stripPrefix, allele.length - stripSufix)) } else { buffer.append(consensus(consensusPos)) consensusPos += 1 @@ -230,7 +230,7 @@ object BastyGenerateFasta extends ToolCommand { } } - (chunk -> (consensus.mkString.toUpperCase, buffer.toString.toUpperCase)) + chunk -> (consensus.mkString.toUpperCase, buffer.toString().toUpperCase) }).toMap if (cmdArgs.outputConsensus != null) { val writer = new PrintWriter(cmdArgs.outputConsensus) @@ -239,7 +239,7 @@ object BastyGenerateFasta extends ToolCommand { writer.print(chunks(c)._1) } writer.println() - writer.close + writer.close() } if (cmdArgs.outputConsensusVariants != null) { val writer = new PrintWriter(cmdArgs.outputConsensusVariants) @@ -248,7 +248,7 @@ object BastyGenerateFasta extends ToolCommand { writer.print(chunks(c)._2) } writer.println() - writer.close + writer.close() } } } @@ -257,12 +257,12 @@ object BastyGenerateFasta extends ToolCommand { val writer = new PrintWriter(cmdArgs.outputVariants) writer.println(">" + cmdArgs.outputName) val vcfReader = new VCFFileReader(cmdArgs.inputVcf, false) - for (vcfRecord <- vcfReader if (!cmdArgs.snpsOnly || vcfRecord.isSNP)) yield { + for (vcfRecord <- vcfReader if !cmdArgs.snpsOnly || vcfRecord.isSNP) yield { writer.print(getMaxAllele(vcfRecord)) } writer.println() - writer.close - vcfReader.close + writer.close() + vcfReader.close() } protected def getMaxAllele(vcfRecord: VariantContext): String = { @@ -276,6 +276,6 @@ object BastyGenerateFasta extends ToolCommand { if (AD == null) return fillAllele("", maxSize) val maxADid = AD.zipWithIndex.maxBy(_._1)._2 if (AD(maxADid) < cmdArgs.minAD) return fillAllele("", maxSize) - return fillAllele(vcfRecord.getAlleles()(maxADid).getBaseString, maxSize) + fillAllele(vcfRecord.getAlleles()(maxADid).getBaseString, maxSize) } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala index 76dae2616415147039d8c4ba8c31a64e3468df0e..f396cb3d3ca6417ce1533555f8f2223a0c6060d7 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala @@ -53,19 +53,19 @@ object BedToInterval extends ToolCommand { bedToInterval.input = inputBed bedToInterval.bamFile = inputBam bedToInterval.output = output - return bedToInterval + bedToInterval } case class Args(inputFile: File = null, outputFile: File = null, bamFile: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required () valueName ("<file>") action { (x, c) => + opt[File]('I', "inputFile") required () valueName "<file>" action { (x, c) => c.copy(inputFile = x) } - opt[File]('o', "output") required () valueName ("<file>") action { (x, c) => + opt[File]('o', "output") required () valueName "<file>" action { (x, c) => c.copy(outputFile = x) } - opt[File]('b', "bam") required () valueName ("<file>") action { (x, c) => + opt[File]('b', "bam") required () valueName "<file>" action { (x, c) => c.copy(bamFile = x) } } @@ -85,12 +85,12 @@ object BedToInterval extends ToolCommand { writer.write("@SQ\tSN:" + record.getSequenceName + "\tLN:" + record.getSequenceLength + "\n") record.getSequenceName -> record.getSequenceLength } - inputSam.close + inputSam.close() val refsMap = Map(refs: _*) val bedFile = Source.fromFile(commandArgs.inputFile) for ( - line <- bedFile.getLines; + line <- bedFile.getLines(); split = line.split("\t") if split.size >= 3; chr = split(0); start = split(1); @@ -102,7 +102,7 @@ object BedToInterval extends ToolCommand { else { var strand = "+" for (t <- 3 until split.length) { - if ((split(t) == "+" || split(t) == "-")) strand = split(t) + if (split(t) == "+" || split(t) == "-") strand = split(t) } writer.write(strand) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala index 56cea947ce022426370169a4166cc322c7319d06..cea645c4607b9e2a5e8fd7a4b7980da320266ab0 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala @@ -21,8 +21,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.{ ToolCommand, ToolCommandFuntion } import org.broadinstitute.gatk.utils.commandline.{ Input, Output } -import scala.collection.SortedMap -import scala.collection.mutable.Map +import scala.collection.{ mutable, SortedMap } import scala.io.Source class BedtoolsCoverageToCounts(val root: Configurable) extends ToolCommandFuntion { @@ -45,10 +44,10 @@ object BedtoolsCoverageToCounts extends ToolCommand { case class Args(input: File = null, output: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "input") required () valueName ("<file>") action { (x, c) => + opt[File]('I', "input") required () valueName "<file>" action { (x, c) => c.copy(input = x) } - opt[File]('o', "output") required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]('o', "output") required () unbounded () valueName "<file>" action { (x, c) => c.copy(output = x) } } @@ -62,8 +61,8 @@ object BedtoolsCoverageToCounts extends ToolCommand { if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input) - val counts: Map[String, Long] = Map() - for (line <- Source.fromFile(commandArgs.input).getLines) { + val counts: mutable.Map[String, Long] = mutable.Map() + for (line <- Source.fromFile(commandArgs.input).getLines()) { val values = line.split("\t") val gene = values(3) val count = values(6).toLong @@ -77,6 +76,6 @@ object BedtoolsCoverageToCounts extends ToolCommand { for ((seq, count) <- sortedCounts) { if (count > 0) writer.println(seq + "\t" + count) } - writer.close + writer.close() } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala index 568e2321a4195ca4bc77bab59481cf7434528838..7340a5830a875c6d2ee5a4126d9af62a44203da8 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala @@ -25,6 +25,7 @@ import nl.lumc.sasc.biopet.utils.ConfigUtils import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import scala.collection.JavaConversions._ +import scala.collection.mutable class BiopetFlagstat(val root: Configurable) extends ToolCommandFuntion with Summarizable { javaMainClass = getClass.getName @@ -63,13 +64,13 @@ object BiopetFlagstat extends ToolCommand { case class Args(inputFile: File = null, summaryFile: Option[File] = None, region: Option[String] = None) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required () valueName ("<file>") action { (x, c) => + opt[File]('I', "inputFile") required () valueName "<file>" action { (x, c) => c.copy(inputFile = x) - } text ("input bam file") - opt[File]('s', "summaryFile") valueName ("<file>") action { (x, c) => + } text "input bam file" + opt[File]('s', "summaryFile") valueName "<file>" action { (x, c) => c.copy(summaryFile = Some(x)) - } text ("summary output file") - opt[String]('r', "region") valueName ("<chr:start-stop>") action { (x, c) => + } text "summary output file" + opt[String]('r', "region") valueName "<chr:start-stop>" action { (x, c) => c.copy(region = Some(x)) } } @@ -82,7 +83,7 @@ object BiopetFlagstat extends ToolCommand { val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) val inputSam = SamReaderFactory.makeDefault.open(commandArgs.inputFile) - val iterSam = if (commandArgs.region == None) inputSam.iterator else { + val iterSam = if (commandArgs.region.isEmpty) inputSam.iterator else { val regionRegex = """(.*):(.*)-(.*)""".r commandArgs.region.get match { case regionRegex(chr, start, stop) => inputSam.query(chr, start.toInt, stop.toInt, false) @@ -91,7 +92,7 @@ object BiopetFlagstat extends ToolCommand { } val flagstatCollector = new FlagstatCollector - flagstatCollector.loadDefaultFunctions + flagstatCollector.loadDefaultFunctions() val m = 10 val max = 60 for (t <- 0 to (max / m)) @@ -144,11 +145,10 @@ object BiopetFlagstat extends ToolCommand { } commandArgs.summaryFile.foreach { - case file => { + case file => val writer = new PrintWriter(file) writer.println(flagstatCollector.summary) writer.close() - } } println(flagstatCollector.report) @@ -157,12 +157,12 @@ object BiopetFlagstat extends ToolCommand { class FlagstatCollector { private var functionCount = 0 var readsCount = 0 - private val names: Map[Int, String] = Map() + private val names: mutable.Map[Int, String] = mutable.Map() private var functions: Array[SAMRecord => Boolean] = Array() private var totalCounts: Array[Long] = Array() private var crossCounts = Array.ofDim[Long](1, 1) - def loadDefaultFunctions { + def loadDefaultFunctions() { addFunction("All", record => true) addFunction("Mapped", record => !record.getReadUnmappedFlag) addFunction("Duplicates", record => record.getDuplicateReadFlag) @@ -207,7 +207,7 @@ object BiopetFlagstat extends ToolCommand { crossCounts = Array.ofDim[Long](functionCount, functionCount) totalCounts = new Array[Long](functionCount) val temp = new Array[SAMRecord => Boolean](functionCount) - for (t <- 0 until (temp.size - 1)) temp(t) = functions(t) + for (t <- 0 until (temp.length - 1)) temp(t) = functions(t) functions = temp val index = functionCount - 1 @@ -228,7 +228,7 @@ object BiopetFlagstat extends ToolCommand { buffer.append(crossReport() + "\n") buffer.append(crossReport(fraction = true) + "\n") - return buffer.toString + buffer.toString() } def summary: String = { @@ -236,7 +236,7 @@ object BiopetFlagstat extends ToolCommand { names(t) -> totalCounts(t) }).toMap - return ConfigUtils.mapToJson(map).spaces4 + ConfigUtils.mapToJson(map).spaces4 } def crossReport(fraction: Boolean = false): String = { @@ -258,7 +258,7 @@ object BiopetFlagstat extends ToolCommand { else buffer.append("\t") } } - return buffer.toString + buffer.toString() } } // End of class diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala index 720bb36914b31381c40d16423a3bcd5ab3e48b0d..1059f7c4bcc3595bb5382fed8c5279b623695856 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala @@ -46,19 +46,19 @@ object CheckAllelesVcfInBam extends ToolCommand { bamFiles: List[File] = Nil, minMapQual: Int = 1) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('I', "inputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(inputFile = x) } - opt[File]('o', "outputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('o', "outputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(outputFile = x) } - opt[String]('s', "sample") unbounded () minOccurs (1) action { (x, c) => + opt[String]('s', "sample") unbounded () minOccurs 1 action { (x, c) => c.copy(samples = x :: c.samples) } - opt[File]('b', "bam") unbounded () minOccurs (1) action { (x, c) => + opt[File]('b', "bam") unbounded () minOccurs 1 action { (x, c) => c.copy(bamFiles = x :: c.bamFiles) } - opt[Int]('m', "min_mapping_quality") maxOccurs (1) action { (x, c) => + opt[Int]('m', "min_mapping_quality") maxOccurs 1 action { (x, c) => c.copy(minMapQual = c.minMapQual) } } @@ -98,7 +98,7 @@ object CheckAllelesVcfInBam extends ToolCommand { val refAllele = vcfRecord.getReference.getBaseString for ((sample, bamReader) <- bamReaders) { val queryInterval = new QueryInterval(bamHeaders(sample).getSequenceIndex(vcfRecord.getChr), - vcfRecord.getStart, vcfRecord.getStart + refAllele.size - 1) + vcfRecord.getStart, vcfRecord.getStart + refAllele.length - 1) val bamIter = bamReader.query(Array(queryInterval), false) def filterRead(samRecord: SAMRecord): Boolean = { @@ -112,7 +112,7 @@ object CheckAllelesVcfInBam extends ToolCommand { countReports(sample).lowMapQualReads += 1 return true } - return false + false } val counts = for (samRecord <- bamIter if !filterRead(samRecord)) { @@ -122,7 +122,7 @@ object CheckAllelesVcfInBam extends ToolCommand { case _ => countReports(sample).notFound += 1 } } - bamIter.close + bamIter.close() } val builder = new VariantContextBuilder(vcfRecord) @@ -137,49 +137,49 @@ object CheckAllelesVcfInBam extends ToolCommand { } writer.add(builder.make) } - for ((_, r) <- bamReaders) r.close - reader.close - writer.close + for ((_, r) <- bamReaders) r.close() + reader.close() + writer.close() } def checkAlles(samRecord: SAMRecord, vcfRecord: VariantContext): Option[String] = { val readStartPos = List.range(0, samRecord.getReadBases.length) .find(x => samRecord.getReferencePositionAtReadPosition(x + 1) == vcfRecord.getStart) getOrElse { return None } - val readBases = samRecord.getReadBases() + val readBases = samRecord.getReadBases val alleles = vcfRecord.getAlleles.map(x => x.getBaseString) val refAllele = alleles.head var maxSize = 1 - for (allele <- alleles if allele.size > maxSize) maxSize = allele.size + for (allele <- alleles if allele.length > maxSize) maxSize = allele.length val readC = for (t <- readStartPos until readStartPos + maxSize if t < readBases.length) yield readBases(t).toChar - val allelesInRead = mutable.Set(alleles.filter(readC.mkString.startsWith(_)): _*) + val allelesInRead = mutable.Set(alleles.filter(readC.mkString.startsWith): _*) // Removal of insertions that are not really in the cigarstring - for (allele <- allelesInRead if allele.size > refAllele.size) { - val refPos = for (t <- refAllele.size until allele.size) yield samRecord.getReferencePositionAtReadPosition(readStartPos + t + 1) + for (allele <- allelesInRead if allele.length > refAllele.length) { + val refPos = for (t <- refAllele.length until allele.length) yield samRecord.getReferencePositionAtReadPosition(readStartPos + t + 1) if (refPos.exists(_ > 0)) allelesInRead -= allele } // Removal of alleles that are not really in the cigarstring for (allele <- allelesInRead) { - val readPosAfterAllele = samRecord.getReferencePositionAtReadPosition(readStartPos + allele.size + 1) - val vcfPosAfterAllele = vcfRecord.getStart + refAllele.size + val readPosAfterAllele = samRecord.getReferencePositionAtReadPosition(readStartPos + allele.length + 1) + val vcfPosAfterAllele = vcfRecord.getStart + refAllele.length if (readPosAfterAllele != vcfPosAfterAllele && - (refAllele.size != allele.size || (refAllele.size == allele.size && readPosAfterAllele < 0))) allelesInRead -= allele + (refAllele.length != allele.length || (refAllele.length == allele.length && readPosAfterAllele < 0))) allelesInRead -= allele } - for (allele <- allelesInRead if allele.size >= refAllele.size) { - if (allelesInRead.exists(_.size > allele.size)) allelesInRead -= allele + for (allele <- allelesInRead if allele.length >= refAllele.length) { + if (allelesInRead.exists((_.length) > allele.length)) allelesInRead -= allele } - if (allelesInRead.contains(refAllele) && allelesInRead.exists(_.size < refAllele.size)) allelesInRead -= refAllele - if (allelesInRead.isEmpty) return None - else if (allelesInRead.size == 1) return Some(allelesInRead.head) + if (allelesInRead.contains(refAllele) && allelesInRead.exists((_.length) < refAllele.length)) allelesInRead -= refAllele + if (allelesInRead.isEmpty) None + else if (allelesInRead.size == 1) Some(allelesInRead.head) else { logger.warn("vcfRecord: " + vcfRecord) logger.warn("samRecord: " + samRecord.getSAMString) logger.warn("Found multiple options: " + allelesInRead.toString) logger.warn("ReadStartPos: " + readStartPos + " Read Length: " + samRecord.getReadLength) logger.warn("Read skipped, please report this") - return None + None } } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala index 219d4f7104355fc309d0821d53a683f3df2ff838..e8042553b5c32affee466c6372eeac29d1e5f7c6 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala @@ -229,9 +229,9 @@ object ExtractAlignedFastq extends ToolCommand { """.stripMargin) checkConfig { c => - if (c.inputFastq2 != None && c.outputFastq2 == None) + if (c.inputFastq2.isDefined && c.outputFastq2.isEmpty) failure("Missing output FASTQ file 2") - else if (c.inputFastq2 == None && c.outputFastq2 != None) + else if (c.inputFastq2.isEmpty && c.outputFastq2.isDefined) failure("Missing input FASTQ file 2") else success diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala index cffc5023c750b15f7fb390b945401d0d19da7b1a..beed86c64f2aa792af84bac2bd3439ff616dedca 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala @@ -53,12 +53,12 @@ object FastqSplitter extends ToolCommand { case class Args(inputFile: File = null, outputFile: List[File] = Nil) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required () valueName ("<file>") action { (x, c) => + opt[File]('I', "inputFile") required () valueName "<file>" action { (x, c) => c.copy(inputFile = x) - } text ("out is a required file property") - opt[File]('o', "output") required () unbounded () valueName ("<file>") action { (x, c) => + } text "out is a required file property" + opt[File]('o', "output") required () unbounded () valueName "<file>" action { (x, c) => c.copy(outputFile = x :: c.outputFile) - } text ("out is a required file property") + } text "out is a required file property" } /** @@ -87,7 +87,7 @@ object FastqSplitter extends ToolCommand { } } } - for (writer <- output) writer.close + for (writer <- output) writer.close() logger.info("Done, " + counter + " reads processed") } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala index 9c7baf364ee741be7b7eb8ea9b2af22a4ac63bb5..dafa21f20d18ce0c0e364e17778b3547f0d2ed67 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala @@ -27,12 +27,12 @@ object FindRepeatsPacBio extends ToolCommand { case class Args(inputBam: File = null, inputBed: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputBam") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('I', "inputBam") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(inputBam = x) } - opt[File]('b', "inputBed") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('b', "inputBed") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(inputBed = x) - } text ("output file, default to stdout") + } text "output file, default to stdout" } /** @@ -53,7 +53,7 @@ object FindRepeatsPacBio extends ToolCommand { println(header.mkString("\t")) for ( - bedLine <- Source.fromFile(commandArgs.inputBed).getLines; + bedLine <- Source.fromFile(commandArgs.inputBed).getLines(); values = bedLine.split("\t"); if values.size >= 3 ) { val interval = new QueryInterval(bamHeader.getSequenceIndex(values(0)), values(1).toInt, values(2).toInt) @@ -78,7 +78,7 @@ object FindRepeatsPacBio extends ToolCommand { inserts ::= result.get.ins.map(_.insert).mkString(",") deletions ::= result.get.dels.map(_.length).mkString(",") val length = oriRepeatLength - result.get.beginDel - result.get.endDel - - ((0 /: result.get.dels.map(_.length))(_ + _)) + ((0 /: result.get.ins.map(_.insert.size))(_ + _)) + (0 /: result.get.dels.map(_.length))(_ + _) + (0 /: result.get.ins.map(_.insert.length))(_ + _) calcRepeatLength ::= length if (length > maxLength) maxLength = length if (length < minLength || minLength == -1) minLength = length @@ -86,7 +86,7 @@ object FindRepeatsPacBio extends ToolCommand { } println(List(chr, startPos, stopPos, typeRepeat, repeatLength, oriRepeatLength, calcRepeatLength.mkString(","), minLength, maxLength, inserts.mkString("/"), deletions.mkString("/"), notSpan).mkString("\t")) - bamIter.close + bamIter.close() } } @@ -135,6 +135,6 @@ object FindRepeatsPacBio extends ToolCommand { } } - return Some(result) + Some(result) } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala index 4f972de13b29adb649bf3c1bfc3bc59b5b0b25b7..1ff699f1ad4de288f7019e0903cd748e9755bc47 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala @@ -26,7 +26,7 @@ import nl.lumc.sasc.biopet.core.{ ToolCommand, ToolCommandFuntion } import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import scala.collection.JavaConversions._ -import scala.collection.SortedMap +import scala.collection.{ mutable, SortedMap } import scala.collection.mutable.{ Map, Set } class MergeAlleles(val root: Configurable) extends ToolCommandFuntion { @@ -45,8 +45,8 @@ class MergeAlleles(val root: Configurable) extends ToolCommandFuntion { override val defaultCoreMemory = 1.0 - override def beforeGraph { - super.beforeGraph + override def beforeGraph() { + super.beforeGraph() if (output.getName.endsWith(".gz")) outputIndex = new File(output.getAbsolutePath + ".tbi") if (output.getName.endsWith(".vcf")) outputIndex = new File(output.getAbsolutePath + ".idx") } @@ -62,19 +62,19 @@ object MergeAlleles extends ToolCommand { val mergeAlleles = new MergeAlleles(root) mergeAlleles.input = input mergeAlleles.output = output - return mergeAlleles + mergeAlleles } case class Args(inputFiles: List[File] = Nil, outputFile: File = null, reference: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputVcf") minOccurs (2) required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]('I', "inputVcf") minOccurs 2 required () unbounded () valueName "<file>" action { (x, c) => c.copy(inputFiles = x :: c.inputFiles) } - opt[File]('o', "outputVcf") required () unbounded () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('o', "outputVcf") required () unbounded () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(outputFile = x) } - opt[File]('R', "reference") required () unbounded () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('R', "reference") required () unbounded () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(reference = x) } } @@ -99,8 +99,8 @@ object MergeAlleles extends ToolCommand { header.setSequenceDictionary(referenceDict) writer.writeHeader(header) - for (chr <- referenceDict.getSequences; chunk <- (0 to (chr.getSequenceLength / chunkSize))) { - val output: Map[Int, List[VariantContext]] = Map() + for (chr <- referenceDict.getSequences; chunk <- 0 to (chr.getSequenceLength / chunkSize)) { + val output: mutable.Map[Int, List[VariantContext]] = mutable.Map() val chrName = chr.getSequenceName val begin = chunk * chunkSize + 1 @@ -119,17 +119,17 @@ object MergeAlleles extends ToolCommand { writer.add(mergeAlleles(v)) } } - writer.close + writer.close() readers.foreach(_.close) } def mergeAlleles(records: List[VariantContext]): VariantContext = { val longestRef = { var l: Array[Byte] = Array() - for (a <- records.map(_.getReference.getBases) if (a.length > l.size)) l = a + for (a <- records.map(_.getReference.getBases) if a.length > l.length) l = a Allele.create(l, true) } - val alleles: Set[Allele] = Set() + val alleles: mutable.Set[Allele] = mutable.Set() val builder = new VariantContextBuilder builder.chr(records.head.getChr) builder.start(records.head.getStart) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala index 8703df78bc13ed7c06fa259950c807b11d191bc2..d9f6ce484a248666c4b8fc9a7a6f955f043a3c44 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala @@ -101,7 +101,7 @@ object MergeTables extends ToolCommand { val split = line .split(delimiter) .filter(_.nonEmpty) - val colSize = split.size + val colSize = split.length require(idIdces.forall(_ < colSize), "All feature ID indices must be smaller than number of columns") require(valIdx < colSize, "Value index must be smaller than number of columns") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala index 4a1b41a0653423b388c30cdff18d45a16f1ad2a8..f48bdaf7dfb6b6ca5dcca84bc45c0a7c51a77ee8 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala @@ -25,6 +25,7 @@ import nl.lumc.sasc.biopet.utils.ConfigUtils import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import scala.collection.JavaConversions._ +import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.io.Source import scala.math.{ floor, round } @@ -53,19 +54,19 @@ class MpileupToVcf(val root: Configurable) extends ToolCommandFuntion with Refer override def defaults = ConfigUtils.mergeMaps(Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1)), super.defaults) - override def beforeGraph { - super.beforeGraph + override def beforeGraph() { + super.beforeGraph() reference = referenceFasta().getAbsolutePath val samtoolsMpileup = new SamtoolsMpileup(this) } - override def beforeCmd: Unit = { + override def beforeCmd(): Unit = { if (sample == null && inputBam.exists()) { val inputSam = SamReaderFactory.makeDefault.open(inputBam) val readGroups = inputSam.getFileHeader.getReadGroups val samples = readGroups.map(readGroup => readGroup.getSample).distinct sample = samples.head - inputSam.close + inputSam.close() } } @@ -92,12 +93,12 @@ object MpileupToVcf extends ToolCommand { homoFraction: Double = 0.8, ploidy: Int = 2) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "input") valueName ("<file>") action { (x, c) => + opt[File]('I', "input") valueName "<file>" action { (x, c) => c.copy(input = x) - } text ("input, default is stdin") - opt[File]('o', "output") required () valueName ("<file>") action { (x, c) => + } text "input, default is stdin" + opt[File]('o', "output") required () valueName "<file>" action { (x, c) => c.copy(output = x) - } text ("out is a required file property") + } text "out is a required file property" opt[String]('s', "sample") required () action { (x, c) => c.copy(sample = x) } @@ -139,16 +140,15 @@ object MpileupToVcf extends ToolCommand { writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">") writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample) val inputStream = if (commandArgs.input != null) { - Source.fromFile(commandArgs.input).getLines + Source.fromFile(commandArgs.input).getLines() } else { logger.info("No input file as argument, waiting on stdin") - Source.stdin.getLines + Source.stdin.getLines() } class Counts(var forward: Int, var reverse: Int) for ( line <- inputStream; - values = line.split("\t"); - if values.size > 5 + values = line.split("\t") if values.size > 5 ) { val chr = values(0) val pos = values(1) @@ -162,7 +162,7 @@ object MpileupToVcf extends ToolCommand { val mpileup = values(4) val qual = values(5) - val counts: Map[String, Counts] = Map(ref.toUpperCase -> new Counts(0, 0)) + val counts: mutable.Map[String, Counts] = mutable.Map(ref.toUpperCase -> new Counts(0, 0)) def addCount(s: String) { val upper = s.toUpperCase @@ -173,23 +173,20 @@ object MpileupToVcf extends ToolCommand { var t = 0 var dels = 0 - while (t < mpileup.size) { + while (t < mpileup.length) { mpileup(t) match { - case ',' => { + case ',' => addCount(ref.toLowerCase) t += 1 - } - case '.' => { + case '.' => addCount(ref.toUpperCase) t += 1 - } case '^' => t += 2 case '$' => t += 1 - case '*' => { + case '*' => dels += 1 t += 1 - } - case '+' | '-' => { + case '+' | '-' => t += 1 var size = "" var insert = "" @@ -199,17 +196,15 @@ object MpileupToVcf extends ToolCommand { } for (c <- t until t + size.toInt) insert = insert + mpileup(c) t += size.toInt - } - case 'a' | 'c' | 't' | 'g' | 'A' | 'C' | 'T' | 'G' => { + case 'a' | 'c' | 't' | 'g' | 'A' | 'C' | 'T' | 'G' => addCount(mpileup(t).toString) t += 1 - } case _ => t += 1 } } val info: ArrayBuffer[String] = ArrayBuffer("DP=" + reads) - val format: Map[String, String] = Map("DP" -> reads.toString) + val format: mutable.Map[String, String] = mutable.Map("DP" -> reads.toString) val alt: ArrayBuffer[String] = new ArrayBuffer format += ("RFC" -> counts(ref.toUpperCase).forward.toString) format += ("RRC" -> counts(ref.toUpperCase).reverse.toString) @@ -223,14 +218,14 @@ object MpileupToVcf extends ToolCommand { round((value.forward + value.reverse).toDouble / reads * 1E4).toDouble / 1E2)) } - if (alt.size > 0) { + if (alt.nonEmpty) { val ad = for (ad <- format("AD").split(",")) yield ad.toInt var left = reads - dels val gt = ArrayBuffer[Int]() for (p <- 0 to alt.size if gt.size < commandArgs.ploidy) { var max = -1 - for (a <- 0 until ad.length if ad(a) > (if (max >= 0) ad(max) else -1) && !gt.exists(_ == a)) max = a + for (a <- ad.indices if ad(a) > (if (max >= 0) ad(max) else -1) && !gt.contains(a)) max = a val f = ad(max).toDouble / left for (a <- 0 to floor(f).toInt if gt.size < commandArgs.ploidy) gt.append(max) if (f - floor(f) >= commandArgs.homoFraction) { @@ -243,6 +238,6 @@ object MpileupToVcf extends ToolCommand { ).mkString("\t")) } } - writer.close + writer.close() } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/PrefixFastq.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/PrefixFastq.scala index da055614835c1e577bce53b2885f4858e1896fd9..0a325155ecd1393f72c995d0d0d17dcc564d971e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/PrefixFastq.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/PrefixFastq.scala @@ -64,7 +64,7 @@ object PrefixFastq extends ToolCommand { val prefixFastq = new PrefixFastq(root) prefixFastq.inputFastq = input prefixFastq.outputFastq = new File(outputDir, input.getName + ".prefix.fastq") - return prefixFastq + prefixFastq } /** @@ -76,13 +76,13 @@ object PrefixFastq extends ToolCommand { case class Args(input: File = null, output: File = null, seq: String = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('i', "input") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('i', "input") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(input = x) } - opt[File]('o', "output") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('o', "output") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(output = x) } - opt[String]('s', "seq") required () maxOccurs (1) valueName ("<prefix seq>") action { (x, c) => + opt[String]('s', "seq") required () maxOccurs 1 valueName "<prefix seq>" action { (x, c) => c.copy(seq = x) } } @@ -110,7 +110,7 @@ object PrefixFastq extends ToolCommand { val readHeader = read.getReadHeader val readSeq = cmdArgs.seq + read.getReadString val baseQualityHeader = read.getBaseQualityHeader - val baseQuality = Array.fill(cmdArgs.seq.size)(maxQuality).mkString + read.getBaseQualityString + val baseQuality = Array.fill(cmdArgs.seq.length)(maxQuality).mkString + read.getBaseQualityString writer.write(new FastqRecord(readHeader, readSeq, baseQualityHeader, baseQuality)) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala index 18cb7ab4dc5c1dcf6d695da15dc9600abb88c87b..79388e782bf18259271b478e4fefd7e917ade100 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala @@ -22,7 +22,7 @@ import nl.lumc.sasc.biopet.core.{ ToolCommand, ToolCommandFuntion } import org.biojava3.sequencing.io.fastq.{ Fastq, SangerFastqReader, StreamListener } import org.broadinstitute.gatk.utils.commandline.{ Input, Output } -import scala.collection.SortedMap +import scala.collection.{ mutable, SortedMap } import scala.collection.mutable.Map class SageCountFastq(val root: Configurable) extends ToolCommandFuntion { @@ -45,10 +45,10 @@ object SageCountFastq extends ToolCommand { case class Args(input: File = null, output: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "input") required () valueName ("<file>") action { (x, c) => + opt[File]('I', "input") required () valueName "<file>" action { (x, c) => c.copy(input = x) } - opt[File]('o', "output") required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]('o', "output") required () unbounded () valueName "<file>" action { (x, c) => c.copy(output = x) } } @@ -62,7 +62,7 @@ object SageCountFastq extends ToolCommand { if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input) - val counts: Map[String, Long] = Map() + val counts: mutable.Map[String, Long] = mutable.Map() val reader = new SangerFastqReader var count = 0 logger.info("Reading fastq file: " + commandArgs.input) @@ -86,6 +86,6 @@ object SageCountFastq extends ToolCommand { for ((seq, count) <- sortedCounts) { writer.println(seq + "\t" + count) } - writer.close + writer.close() } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala index 4d8bc55d32d7e28f34f8e4e4a4058493c2c2af6b..efc9ccf439db1986d59b6e57c3c9b4649da41c26 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala @@ -24,7 +24,7 @@ import org.biojava3.core.sequence.io.FastaReaderHelper import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import scala.collection.JavaConversions._ -import scala.collection.SortedMap +import scala.collection.{ mutable, SortedMap } import scala.collection.mutable.{ Map, Set } import scala.util.matching.Regex @@ -65,10 +65,10 @@ object SageCreateLibrary extends ToolCommand { noAntiTagsOutput: File = null, allGenesOutput: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "input") required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]('I', "input") required () unbounded () valueName "<file>" action { (x, c) => c.copy(input = x) } - opt[File]('o', "output") required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]('o', "output") required () unbounded () valueName "<file>" action { (x, c) => c.copy(output = x) } opt[String]("tag") required () unbounded () action { (x, c) => @@ -77,13 +77,13 @@ object SageCreateLibrary extends ToolCommand { opt[Int]("length") required () unbounded () action { (x, c) => c.copy(length = x) } - opt[File]("noTagsOutput") required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]("noTagsOutput") required () unbounded () valueName "<file>" action { (x, c) => c.copy(noTagsOutput = x) } - opt[File]("noAntiTagsOutput") required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]("noAntiTagsOutput") required () unbounded () valueName "<file>" action { (x, c) => c.copy(noAntiTagsOutput = x) } - opt[File]("allGenesOutput") unbounded () valueName ("<file>") action { (x, c) => + opt[File]("allGenesOutput") unbounded () valueName "<file>" action { (x, c) => c.copy(allGenesOutput = x) } } @@ -91,17 +91,17 @@ object SageCreateLibrary extends ToolCommand { var tagRegex: Regex = null var geneRegex = """ENSG[0-9]{11}""".r - val tagGenesMap: Map[String, TagGenes] = Map() + val tagGenesMap: mutable.Map[String, TagGenes] = mutable.Map() - val allGenes: Set[String] = Set() - val tagGenes: Set[String] = Set() - val antiTagGenes: Set[String] = Set() + val allGenes: mutable.Set[String] = mutable.Set() + val tagGenes: mutable.Set[String] = mutable.Set() + val antiTagGenes: mutable.Set[String] = mutable.Set() class TagGenes { - val firstTag: Set[String] = Set() - val allTags: Set[String] = Set() - val firstAntiTag: Set[String] = Set() - val allAntiTags: Set[String] = Set() + val firstTag: mutable.Set[String] = mutable.Set() + val allTags: mutable.Set[String] = mutable.Set() + val firstAntiTag: mutable.Set[String] = mutable.Set() + val allAntiTags: mutable.Set[String] = mutable.Set() } class TagResult(val firstTag: String, val allTags: List[String], val firstAntiTag: String, val allAntiTags: List[String]) @@ -147,7 +147,7 @@ object SageCreateLibrary extends ToolCommand { for (gene <- allGenes if !tagGenes.contains(gene)) { writer.println(gene) } - writer.close + writer.close() } if (commandArgs.noAntiTagsOutput != null) { @@ -155,7 +155,7 @@ object SageCreateLibrary extends ToolCommand { for (gene <- allGenes if !antiTagGenes.contains(gene)) { writer.println(gene) } - writer.close + writer.close() } if (commandArgs.allGenesOutput != null) { @@ -163,7 +163,7 @@ object SageCreateLibrary extends ToolCommand { for (gene <- allGenes) { writer.println(gene) } - writer.close + writer.close() } } @@ -196,14 +196,14 @@ object SageCreateLibrary extends ToolCommand { } def getTags(name: String, seq: DNASequence): TagResult = { - val allTags: List[String] = for (tag <- tagRegex.findAllMatchIn(seq.getSequenceAsString).toList) yield tag.toString + val allTags: List[String] = for (tag <- tagRegex.findAllMatchIn(seq.getSequenceAsString).toList) yield tag.toString() val firstTag = if (allTags.isEmpty) null else allTags.last - val allAntiTags: List[String] = for (tag <- tagRegex.findAllMatchIn(seq.getReverseComplement.getSequenceAsString).toList) yield tag.toString + val allAntiTags: List[String] = for (tag <- tagRegex.findAllMatchIn(seq.getReverseComplement.getSequenceAsString).toList) yield tag.toString() val firstAntiTag = if (allAntiTags.isEmpty) null else allAntiTags.head val result = new TagResult(firstTag, allTags, firstAntiTag, allAntiTags) addTagresultToTaglib(name, result) - return result + result } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala index 925d8f2f6512e7a71c83a589e21187acc5bf91f5..d185fbc350e4bf8c8b4cbf920667163ac517855a 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala @@ -21,7 +21,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.{ ToolCommand, ToolCommandFuntion } import org.broadinstitute.gatk.utils.commandline.{ Input, Output } -import scala.collection.SortedMap +import scala.collection.{ mutable, SortedMap } import scala.collection.mutable.Map import scala.io.Source @@ -62,22 +62,22 @@ object SageCreateTagCounts extends ToolCommand { countAntiSense: File = null, countAllAntiSense: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "input") required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]('I', "input") required () unbounded () valueName "<file>" action { (x, c) => c.copy(input = x) } - opt[File]('t', "tagLib") required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]('t', "tagLib") required () unbounded () valueName "<file>" action { (x, c) => c.copy(tagLib = x) } - opt[File]("countSense") unbounded () valueName ("<file>") action { (x, c) => + opt[File]("countSense") unbounded () valueName "<file>" action { (x, c) => c.copy(countSense = x) } - opt[File]("countAllSense") unbounded () valueName ("<file>") action { (x, c) => + opt[File]("countAllSense") unbounded () valueName "<file>" action { (x, c) => c.copy(countAllSense = x) } - opt[File]("countAntiSense") unbounded () valueName ("<file>") action { (x, c) => + opt[File]("countAntiSense") unbounded () valueName "<file>" action { (x, c) => c.copy(countAntiSense = x) } - opt[File]("countAllAntiSense") unbounded () valueName ("<file>") action { (x, c) => + opt[File]("countAllAntiSense") unbounded () valueName "<file>" action { (x, c) => c.copy(countAllAntiSense = x) } } @@ -91,8 +91,8 @@ object SageCreateTagCounts extends ToolCommand { if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input) - val rawCounts: Map[String, Long] = Map() - for (line <- Source.fromFile(commandArgs.input).getLines) { + val rawCounts: mutable.Map[String, Long] = mutable.Map() + for (line <- Source.fromFile(commandArgs.input).getLines()) { val values = line.split("\t") val gene = values(0) val count = values(1).toLong @@ -100,12 +100,12 @@ object SageCreateTagCounts extends ToolCommand { else rawCounts += gene -> count } - val senseCounts: Map[String, Long] = Map() - val allSenseCounts: Map[String, Long] = Map() - val antiSenseCounts: Map[String, Long] = Map() - val allAntiSenseCounts: Map[String, Long] = Map() + val senseCounts: mutable.Map[String, Long] = mutable.Map() + val allSenseCounts: mutable.Map[String, Long] = mutable.Map() + val antiSenseCounts: mutable.Map[String, Long] = mutable.Map() + val allAntiSenseCounts: mutable.Map[String, Long] = mutable.Map() - for (line <- Source.fromFile(commandArgs.tagLib).getLines if !line.startsWith("#")) { + for (line <- Source.fromFile(commandArgs.tagLib).getLines() if !line.startsWith("#")) { val values = line.split("\t") val tag = values(0) val sense = values(1) @@ -138,14 +138,14 @@ object SageCreateTagCounts extends ToolCommand { } } - def writeFile(file: File, counts: Map[String, Long]) { + def writeFile(file: File, counts: mutable.Map[String, Long]) { val sorted: SortedMap[String, Long] = SortedMap(counts.toArray: _*) if (file != null) { val writer = new PrintWriter(file) for ((gene, count) <- sorted) { if (count > 0) writer.println(gene + "\t" + count) } - writer.close + writer.close() } } writeFile(commandArgs.countSense, senseCounts) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala index 8f03a2d85b280f2a13989330bb980283ed073bb7..46cd2b40cd8645a432aee54081f54e9dd0ec3351 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala @@ -29,22 +29,19 @@ object SamplesTsvToJson extends ToolCommand { case class Args(inputFiles: List[File] = Nil) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('i', "inputFiles") required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]('i', "inputFiles") required () unbounded () valueName "<file>" action { (x, c) => c.copy(inputFiles = x :: c.inputFiles) - } text ("Input must be a tsv file, first line is seen as header and must at least have a 'sample' column, 'library' column is optional, multiple files allowed") + } text "Input must be a tsv file, first line is seen as header and must at least have a 'sample' column, 'library' column is optional, multiple files allowed" } - /** - * Executes SamplesTsvToJson - * @param args - */ + /** Executes SamplesTsvToJson */ def main(args: Array[String]): Unit = { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) val fileMaps = for (inputFile <- commandArgs.inputFiles) yield { val reader = Source.fromFile(inputFile) - val lines = reader.getLines.toList.filter(!_.isEmpty) + val lines = reader.getLines().toList.filter(!_.isEmpty) val header = lines.head.split("\t") val sampleColumn = header.indexOf("sample") val libraryColumn = header.indexOf("library") @@ -55,9 +52,8 @@ object SamplesTsvToJson extends ToolCommand { val sample = values(sampleColumn) val library = if (libraryColumn != -1) values(libraryColumn) else null val valuesMap = (for ( - t <- 0 until values.size; - if !values(t).isEmpty && t != sampleColumn && t != libraryColumn - ) yield (header(t) -> values(t))).toMap + t <- 0 until values.size if !values(t).isEmpty && t != sampleColumn && t != libraryColumn + ) yield header(t) -> values(t)).toMap val map: Map[String, Any] = if (library != null) { Map("samples" -> Map(sample -> Map("libraries" -> Map(library -> valuesMap)))) } else { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala index d04ba91a0d788556b0f63d71ec8239bccc839d36..f232117acef51c5a5c1f09a1143cb318dc402f21 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala @@ -227,7 +227,7 @@ object SeqStat extends ToolCommand { def summarize(): Unit = { // for every position to the max length of any read - for (pos <- 0 until baseStats.length) { + for (pos <- baseStats.indices) { // list all qualities at this particular position `pos` // fix the length of `quals` if (quals.length <= baseStats(pos).qual.length) { @@ -244,7 +244,7 @@ object SeqStat extends ToolCommand { detectPhredEncoding(quals) logger.debug("Detected '" + phredEncoding.toString.toLowerCase + "' encoding in fastq file ...") - for (pos <- 0 until nucs.length) { + for (pos <- nucs.indices) { // always export the N-nucleotide if (nucs(pos) > 0 || pos.toChar == 'N') { nucleotideHistoMap += (pos.toChar -> nucs(pos)) @@ -257,14 +257,14 @@ object SeqStat extends ToolCommand { readHistogram.append(0) } - for (pos <- 0 until quals.length) { + for (pos <- quals.indices) { val key: Int = pos - phredEncoding.id if (key >= 0) { baseHistogram(key) += quals(pos) } } - for (pos <- 0 until readStats.qual.length) { + for (pos <- readStats.qual.indices) { val key: Int = pos - phredEncoding.id if (key > 0) { // count till the max of baseHistogram.length @@ -274,7 +274,7 @@ object SeqStat extends ToolCommand { } } - for (pos <- 0 until readHistogram.length) { + for (pos <- readHistogram.indices) { readQualHistoMap += (pos -> readHistogram(pos)) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala index 731e7039e2092d927d2a0ebe5146371a50453d5b..56a9f7289b58138e7dfad2f944a5e9b526a6dd08 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala @@ -6,6 +6,8 @@ import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.core.summary.Summary /** + * This is a tools to extract values from a summary to a tsv file + * * Created by pjvan_thof on 4/23/15. */ object SummaryToTsv extends ToolCommand { @@ -15,16 +17,16 @@ object SummaryToTsv extends ToolCommand { mode: String = "root") extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('s', "summary") required () unbounded () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('s', "summary") required () unbounded () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(summary = x) } - opt[File]('o', "output") maxOccurs (1) unbounded () valueName ("<file>") action { (x, c) => + opt[File]('o', "output") maxOccurs 1 unbounded () valueName "<file>" action { (x, c) => c.copy(outputFile = Some(x)) } - opt[String]('p', "path") required () unbounded () valueName ("<value>") action { (x, c) => + opt[String]('p', "path") required () unbounded () valueName "<value>" action { (x, c) => c.copy(values = c.values ::: x :: Nil) } - opt[String]('m', "mode") maxOccurs (1) unbounded () valueName ("<root|sample|lib>") action { (x, c) => + opt[String]('m', "mode") maxOccurs 1 unbounded () valueName "<root|sample|lib>" action { (x, c) => c.copy(mode = x) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala index 4b89bf22fbb3daad5c7ce490c44fce80ebbd5e56..5b31736e9bb1144a49b34fd949e15f185b321d55 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala @@ -269,7 +269,7 @@ object VcfFilter extends ToolCommand { def minAlternateDepth(record: VariantContext, minAlternateDepth: Int, minSamplesPass: Int = 1): Boolean = { record.getGenotypes.count(genotype => { val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil - if (!AD.isEmpty) AD.tail.count(_ >= minAlternateDepth) > 0 else true + if (AD.nonEmpty) AD.tail.count(_ >= minAlternateDepth) > 0 else true }) >= minSamplesPass } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala index 01ae908102df1aec7d92f2ed292b07bd6e7dac6e..33bbf42f209f661a74bf21882da27f01b30925b4 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala @@ -33,6 +33,8 @@ import scala.sys.process.{ Process, ProcessLogger } import scala.util.Random /** + * This tool will generate statistics from a vcf file + * * Created by pjvan_thof on 1/10/15. */ class VcfStats(val root: Configurable) extends ToolCommandFuntion with Summarizable with Reference { @@ -61,7 +63,7 @@ class VcfStats(val root: Configurable) extends ToolCommandFuntion with Summariza var allGenotypeTags = false var reference: File = _ - override def beforeGraph: Unit = { + override def beforeGraph(): Unit = { reference = referenceFasta() index = new File(input.getAbsolutePath + ".tbi") } @@ -104,7 +106,7 @@ class VcfStats(val root: Configurable) extends ToolCommandFuntion with Summariza for (s <- 1 until data(0).size) { val sample = data(0)(s) - val stats = Map("genotype" -> (for (f <- 1 until data.size) yield { + val stats = Map("genotype" -> (for (f <- 1 until data.length) yield { data(f)(0) -> data(f)(s) }).toMap) @@ -135,13 +137,13 @@ object VcfStats extends ToolCommand { /** Parsing commandline arguments */ class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]('I', "inputFile") required () unbounded () valueName "<file>" action { (x, c) => c.copy(inputFile = x) } - opt[File]('R', "referenceFile") required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]('R', "referenceFile") required () unbounded () valueName "<file>" action { (x, c) => c.copy(referenceFile = x) } - opt[File]('o', "outputDir") required () unbounded () valueName ("<file>") action { (x, c) => + opt[File]('o', "outputDir") required () unbounded () valueName "<file>" action { (x, c) => c.copy(outputDir = x) } //TODO: add interval argument @@ -150,10 +152,10 @@ object VcfStats extends ToolCommand { c.copy(intervals = Some(x)) } */ - opt[String]("infoTag") unbounded () valueName ("<tag>") action { (x, c) => + opt[String]("infoTag") unbounded () valueName "<tag>" action { (x, c) => c.copy(infoTags = x :: c.infoTags) } - opt[String]("genotypeTag") unbounded () valueName ("<tag>") action { (x, c) => + opt[String]("genotypeTag") unbounded () valueName "<tag>" action { (x, c) => c.copy(genotypeTags = x :: c.genotypeTags) } opt[Unit]("allInfoTags") unbounded () action { (x, c) => @@ -195,8 +197,8 @@ object VcfStats extends ToolCommand { * @param genotypeStats Stores all genotype relative stats * @param sampleToSample Stores sample to sample compare stats */ - case class SampleStats(val genotypeStats: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map(), - val sampleToSample: mutable.Map[String, SampleToSampleStats] = mutable.Map()) { + case class SampleStats(genotypeStats: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map(), + sampleToSample: mutable.Map[String, SampleToSampleStats] = mutable.Map()) { /** Add an other class */ def +=(other: SampleStats): Unit = { for ((key, value) <- other.sampleToSample) { @@ -217,8 +219,8 @@ object VcfStats extends ToolCommand { * @param generalStats Stores are general stats * @param samplesStats Stores all sample/genotype specific stats */ - case class Stats(val generalStats: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map(), - val samplesStats: mutable.Map[String, SampleStats] = mutable.Map()) { + case class Stats(generalStats: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map(), + samplesStats: mutable.Map[String, SampleStats] = mutable.Map()) { /** Add an other class */ def +=(other: Stats): Stats = { for ((key, value) <- other.samplesStats) { @@ -281,24 +283,24 @@ object VcfStats extends ToolCommand { val adInfoTags = { (for ( - infoTag <- commandArgs.infoTags if !defaultInfoFields.exists(_ == infoTag) + infoTag <- commandArgs.infoTags if !defaultInfoFields.contains(infoTag) ) yield { require(header.getInfoHeaderLine(infoTag) != null, "Info tag '" + infoTag + "' not found in header of vcf file") infoTag }) ::: (for ( - line <- header.getInfoHeaderLines if commandArgs.allInfoTags if !defaultInfoFields.exists(_ == line.getID) if !commandArgs.infoTags.exists(_ == line.getID) + line <- header.getInfoHeaderLines if commandArgs.allInfoTags if !defaultInfoFields.contains(line.getID) if !commandArgs.infoTags.contains(line.getID) ) yield { line.getID }).toList ::: defaultInfoFields } val adGenotypeTags = (for ( - genotypeTag <- commandArgs.genotypeTags if !defaultGenotypeFields.exists(_ == genotypeTag) + genotypeTag <- commandArgs.genotypeTags if !defaultGenotypeFields.contains(genotypeTag) ) yield { require(header.getFormatHeaderLine(genotypeTag) != null, "Format tag '" + genotypeTag + "' not found in header of vcf file") genotypeTag }) ::: (for ( - line <- header.getFormatHeaderLines if commandArgs.allGenotypeTags if !defaultGenotypeFields.exists(_ == line.getID) if !commandArgs.genotypeTags.exists(_ == line.getID) if line.getID != "PL" + line <- header.getFormatHeaderLines if commandArgs.allGenotypeTags if !defaultGenotypeFields.contains(line.getID) if !commandArgs.genotypeTags.contains(line.getID) if line.getID != "PL" ) yield { line.getID }).toList ::: defaultGenotypeFields @@ -356,8 +358,7 @@ object VcfStats extends ToolCommand { logger.info("Starting on: " + interval) for ( - record <- reader.query(interval.getSequence, interval.getStart, interval.getEnd); - if record.getStart <= interval.getEnd + record <- reader.query(interval.getSequence, interval.getStart, interval.getEnd) if record.getStart <= interval.getEnd ) { mergeNestedStatsMap(stats.generalStats, checkGeneral(record, adInfoTags)) for (sample1 <- samples) yield { @@ -394,7 +395,7 @@ object VcfStats extends ToolCommand { // Writing info fields to tsv files val infoOutputDir = new File(commandArgs.outputDir, "infotags") writeField(stats, "general", commandArgs.outputDir) - for (field <- (adInfoTags).distinct.par) { + for (field <- adInfoTags.distinct.par) { writeField(stats, field, infoOutputDir) for (line <- referenceFile.getSequenceDictionary.getSequences) { val chr = line.getSequenceName @@ -405,7 +406,7 @@ object VcfStats extends ToolCommand { // Write genotype field to tsv files val genotypeOutputDir = new File(commandArgs.outputDir, "genotypetags") writeGenotypeField(stats, samples, "general", commandArgs.outputDir, prefix = "genotype") - for (field <- (adGenotypeTags).distinct.par) { + for (field <- adGenotypeTags.distinct.par) { writeGenotypeField(stats, samples, field, genotypeOutputDir) for (line <- referenceFile.getSequenceDictionary.getSequences) { val chr = line.getSequenceName @@ -422,13 +423,13 @@ object VcfStats extends ToolCommand { // Write general wiggle tracks for (field <- commandArgs.generalWiggle) { val file = new File(commandArgs.outputDir, "wigs" + File.separator + "general-" + field + ".wig") - writeWiggle(intervals, field, "count", file, false) + writeWiggle(intervals, field, "count", file, genotype = false) } // Write sample wiggle tracks for (field <- commandArgs.genotypeWiggle; sample <- samples) { val file = new File(commandArgs.outputDir, "wigs" + File.separator + "genotype-" + sample + "-" + field + ".wig") - writeWiggle(intervals, field, sample, file, true) + writeWiggle(intervals, field, sample, file, genotype = true) } writeOverlap(stats, _.genotypeOverlap, commandArgs.outputDir + "/sample_compare/genotype_overlap", samples) @@ -474,13 +475,7 @@ object VcfStats extends ToolCommand { value.collect { case x => x.split("\t")(index) } } - /** - * Give back the number of alleles that overlap - * @param g1 - * @param g2 - * @param start start always at 0 - * @return - */ + /** Give back the number of alleles that overlap */ def alleleOverlap(g1: List[Allele], g2: List[Allele], start: Int = 0): Int = { if (g1.isEmpty) start else { @@ -501,25 +496,25 @@ object VcfStats extends ToolCommand { def addToBuffer(key: String, value: Any, found: Boolean): Unit = { val map = buffer.getOrElse(key, Map()) if (found) buffer += key -> (map + (value -> (map.getOrElse(value, 0) + 1))) - else buffer += key -> (map + (value -> (map.getOrElse(value, 0)))) + else buffer += key -> (map + (value -> map.getOrElse(value, 0))) } buffer += "QUAL" -> Map(record.getPhredScaledQual -> 1) - addToBuffer("SampleDistribution-Het", record.getGenotypes.count(genotype => genotype.isHet), true) - addToBuffer("SampleDistribution-HetNonRef", record.getGenotypes.count(genotype => genotype.isHetNonRef), true) - addToBuffer("SampleDistribution-Hom", record.getGenotypes.count(genotype => genotype.isHom), true) - addToBuffer("SampleDistribution-HomRef", record.getGenotypes.count(genotype => genotype.isHomRef), true) - addToBuffer("SampleDistribution-HomVar", record.getGenotypes.count(genotype => genotype.isHomVar), true) - addToBuffer("SampleDistribution-Mixed", record.getGenotypes.count(genotype => genotype.isMixed), true) - addToBuffer("SampleDistribution-NoCall", record.getGenotypes.count(genotype => genotype.isNoCall), true) - addToBuffer("SampleDistribution-NonInformative", record.getGenotypes.count(genotype => genotype.isNonInformative), true) - addToBuffer("SampleDistribution-Available", record.getGenotypes.count(genotype => genotype.isAvailable), true) - addToBuffer("SampleDistribution-Called", record.getGenotypes.count(genotype => genotype.isCalled), true) - addToBuffer("SampleDistribution-Filtered", record.getGenotypes.count(genotype => genotype.isFiltered), true) - addToBuffer("SampleDistribution-Variant", record.getGenotypes.count(genotype => genotype.isHetNonRef || genotype.isHet || genotype.isHomVar), true) - - addToBuffer("general", "Total", true) + addToBuffer("SampleDistribution-Het", record.getGenotypes.count(genotype => genotype.isHet), found = true) + addToBuffer("SampleDistribution-HetNonRef", record.getGenotypes.count(genotype => genotype.isHetNonRef), found = true) + addToBuffer("SampleDistribution-Hom", record.getGenotypes.count(genotype => genotype.isHom), found = true) + addToBuffer("SampleDistribution-HomRef", record.getGenotypes.count(genotype => genotype.isHomRef), found = true) + addToBuffer("SampleDistribution-HomVar", record.getGenotypes.count(genotype => genotype.isHomVar), found = true) + addToBuffer("SampleDistribution-Mixed", record.getGenotypes.count(genotype => genotype.isMixed), found = true) + addToBuffer("SampleDistribution-NoCall", record.getGenotypes.count(genotype => genotype.isNoCall), found = true) + addToBuffer("SampleDistribution-NonInformative", record.getGenotypes.count(genotype => genotype.isNonInformative), found = true) + addToBuffer("SampleDistribution-Available", record.getGenotypes.count(genotype => genotype.isAvailable), found = true) + addToBuffer("SampleDistribution-Called", record.getGenotypes.count(genotype => genotype.isCalled), found = true) + addToBuffer("SampleDistribution-Filtered", record.getGenotypes.count(genotype => genotype.isFiltered), found = true) + addToBuffer("SampleDistribution-Variant", record.getGenotypes.count(genotype => genotype.isHetNonRef || genotype.isHet || genotype.isHomVar), found = true) + + addToBuffer("general", "Total", found = true) addToBuffer("general", "Biallelic", record.isBiallelic) addToBuffer("general", "ComplexIndel", record.isComplexIndel) addToBuffer("general", "Filtered", record.isFiltered) @@ -543,8 +538,8 @@ object VcfStats extends ToolCommand { for (tag <- additionalTags if !skipTags.contains(tag)) { val value = record.getAttribute(tag) - if (value == null) addToBuffer(tag, "notset", true) - else addToBuffer(tag, value, true) + if (value == null) addToBuffer(tag, "notset", found = true) + else addToBuffer(tag, value, found = true) } Map(record.getChr -> buffer.toMap, "total" -> buffer.toMap) @@ -557,7 +552,7 @@ object VcfStats extends ToolCommand { def addToBuffer(key: String, value: Any, found: Boolean): Unit = { val map = buffer.getOrElse(key, Map()) if (found) buffer += key -> (map + (value -> (map.getOrElse(value, 0) + 1))) - else buffer += key -> (map + (value -> (map.getOrElse(value, 0)))) + else buffer += key -> (map + (value -> map.getOrElse(value, 0))) } buffer += "DP" -> Map((if (genotype.hasDP) genotype.getDP else "not set") -> 1) @@ -565,7 +560,7 @@ object VcfStats extends ToolCommand { val usedAlleles = (for (allele <- genotype.getAlleles) yield record.getAlleleIndex(allele)).toList - addToBuffer("general", "Total", true) + addToBuffer("general", "Total", found = true) addToBuffer("general", "Het", genotype.isHet) addToBuffer("general", "HetNonRef", genotype.isHetNonRef) addToBuffer("general", "Hom", genotype.isHom) @@ -582,11 +577,11 @@ object VcfStats extends ToolCommand { if (genotype.hasAD) { val ad = genotype.getAD for (i <- 0 until ad.size if ad(i) > 0) { - addToBuffer("AD", ad(i), true) - if (i == 0) addToBuffer("AD-ref", ad(i), true) - if (i > 0) addToBuffer("AD-alt", ad(i), true) - if (usedAlleles.exists(_ == i)) addToBuffer("AD-used", ad(i), true) - else addToBuffer("AD-not_used", ad(i), true) + addToBuffer("AD", ad(i), found = true) + if (i == 0) addToBuffer("AD-ref", ad(i), found = true) + if (i > 0) addToBuffer("AD-alt", ad(i), found = true) + if (usedAlleles.contains(i)) addToBuffer("AD-used", ad(i), found = true) + else addToBuffer("AD-not_used", ad(i), found = true) } } @@ -594,8 +589,8 @@ object VcfStats extends ToolCommand { for (tag <- additionalTags if !skipTags.contains(tag)) { val value = genotype.getAnyAttribute(tag) - if (value == null) addToBuffer(tag, "notset", true) - else addToBuffer(tag, value, true) + if (value == null) addToBuffer(tag, "notset", found = true) + else addToBuffer(tag, value, found = true) } Map(record.getChr -> buffer.toMap, "total" -> buffer.toMap) @@ -615,7 +610,7 @@ object VcfStats extends ToolCommand { val writer = new PrintWriter(file) writer.println(samples.mkString(field + "\t", "\t", "")) val keySet = (for (sample <- samples) yield stats.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).keySet).fold(Set[Any]())(_ ++ _) - for (key <- keySet.toList.sortWith(sortAnyAny(_, _))) { + for (key <- keySet.toList.sortWith(sortAnyAny)) { val values = for (sample <- samples) yield stats.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).getOrElse(key, 0) writer.println(values.mkString(key + "\t", "\t", "")) } @@ -639,7 +634,7 @@ object VcfStats extends ToolCommand { file.getParentFile.mkdirs() val writer = new PrintWriter(file) writer.println("value\tcount") - for (key <- data.keySet.toList.sortWith(sortAnyAny(_, _))) { + for (key <- data.keySet.toList.sortWith(sortAnyAny)) { writer.println(key + "\t" + data(key)) } writer.close() @@ -649,24 +644,17 @@ object VcfStats extends ToolCommand { /** Function to sort Any values */ def sortAnyAny(a: Any, b: Any): Boolean = { a match { - case ai: Int => { + case ai: Int => b match { case bi: Int => ai < bi case bi: Double => ai < bi case _ => a.toString < b.toString } - } case _ => a.toString < b.toString } } - /** - * Function to write sample to sample compare tsv's / heatmaps - * @param stats - * @param function function to extract targeted var in SampleToSampleStats - * @param prefix - * @param samples - */ + /** Function to write sample to sample compare tsv's / heatmaps */ def writeOverlap(stats: Stats, function: SampleToSampleStats => Int, prefix: String, samples: List[String]): Unit = { val absFile = new File(prefix + ".abs.tsv") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala index 0783e65ff63abb2138c186b19f3d666b3957d899..ab1f9d8cb2d1845efc3dbe3df52a419283b62e6a 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala @@ -22,6 +22,7 @@ import htsjdk.variant.vcf.VCFFileReader import nl.lumc.sasc.biopet.core.ToolCommand import scala.collection.JavaConversions._ +import scala.collection.mutable import scala.collection.mutable.{ ListBuffer, Map } class VcfToTsv { @@ -35,12 +36,12 @@ object VcfToTsv extends ToolCommand { separator: String = "\t", listSeparator: String = ",", maxDecimals: Int = 2) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('I', "inputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(inputFile = x) } - opt[File]('o', "outputFile") maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('o', "outputFile") maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(outputFile = x) - } text ("output file, default to stdout") + } text "output file, default to stdout" opt[String]('f', "field") unbounded () action { (x, c) => c.copy(fields = x :: c.fields) } @@ -59,15 +60,15 @@ object VcfToTsv extends ToolCommand { opt[Unit]('d', "disable_defaults") unbounded () action { (x, c) => c.copy(disableDefaults = true) } - opt[String]("separator") maxOccurs (1) action { (x, c) => + opt[String]("separator") maxOccurs 1 action { (x, c) => c.copy(separator = x) - } text ("Optional separator. Default is tab-delimited") - opt[String]("list_separator") maxOccurs (1) action { (x, c) => + } text "Optional separator. Default is tab-delimited" + opt[String]("list_separator") maxOccurs 1 action { (x, c) => c.copy(listSeparator = x) - } text ("Optional list separator. By default, lists are separated by a comma") - opt[Int]("max_decimals") maxOccurs (1) action { (x, c) => + } text "Optional list separator. By default, lists are separated by a comma" + opt[Int]("max_decimals") maxOccurs 1 action { (x, c) => c.copy(maxDecimals = x) - } text ("Number of decimal places for numbers. Default is 2") + } text "Number of decimal places for numbers. Default is 2" } val defaultFields = List("CHROM", "POS", "ID", "REF", "ALT", "QUAL") @@ -94,7 +95,7 @@ object VcfToTsv extends ToolCommand { commandArgs.fields.toSet[String] ++ (if (commandArgs.allInfo) allInfoFields else commandArgs.infoFields).map("INFO-" + _) ++ { val buffer: ListBuffer[String] = ListBuffer() - for (f <- (if (commandArgs.allFormat) allFormatFields else commandArgs.sampleFields); sample <- samples) { + for (f <- if (commandArgs.allFormat) allFormatFields else commandArgs.sampleFields; sample <- samples) { buffer += sample + "-" + f } buffer.toSet[String] @@ -107,7 +108,7 @@ object VcfToTsv extends ToolCommand { writer.println(sortedFields.mkString("#", commandArgs.separator, "")) for (vcfRecord <- reader) { - val values: Map[String, Any] = Map() + val values: mutable.Map[String, Any] = mutable.Map() values += "CHROM" -> vcfRecord.getChr values += "POS" -> vcfRecord.getStart values += "ID" -> vcfRecord.getID @@ -158,7 +159,7 @@ object VcfToTsv extends ToolCommand { * @return DecimalFormat formatter */ def createFormatter(len: Int): DecimalFormat = { - val patternString = "###." + (for (x <- (1 to len)) yield "#").mkString("") + val patternString = "###." + (for (x <- 1 to len) yield "#").mkString("") new DecimalFormat(patternString) } @@ -173,31 +174,29 @@ object VcfToTsv extends ToolCommand { def sortFields(fields: Set[String], samples: List[String]): List[String] = { def fieldType(x: String) = x match { case _ if x.startsWith("INFO-") => 'i' - case _ if (samples.exists(y => x.startsWith(y + "-"))) => 'f' + case _ if samples.exists(y => x.startsWith(y + "-")) => 'f' case _ => 'g' } fields.toList.sortWith((a, b) => { (fieldType(a), fieldType(b)) match { - case ('g', 'g') => { + case ('g', 'g') => val ai = defaultFields.indexOf(a) val bi = defaultFields.indexOf(b) if (bi < 0) true else ai <= bi - } - case ('f', 'f') => { + case ('f', 'f') => val sampleA = a.split("-").head val sampleB = b.split("-").head sampleA.compareTo(sampleB) match { - case 0 => !(a.compareTo(b) > 0) - case i if (i > 0) => false - case _ => true + case 0 => !(a.compareTo(b) > 0) + case i if i > 0 => false + case _ => true } - } - case ('g', _) => true - case (_, 'g') => false - case (a, b) if a == b => !(a.compareTo(b) > 0) - case ('i', _) => true - case _ => false + case ('g', _) => true + case (_, 'g') => false + case (a2, b2) if a2 == b2 => !(a2.compareTo(b2) > 0) + case ('i', _) => true + case _ => false } }) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala index 94bb8fa20078238839953c7a119beaebc0611a51..f78dd804028168f04980fa9ba8ec9d925ab3ee2d 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala @@ -10,6 +10,8 @@ import nl.lumc.sasc.biopet.core.ToolCommand import scala.collection.JavaConversions._ /** + * This is a tool to annotate a vcf file with info value from a other vcf file + * * Created by ahbbollen on 11-2-15. */ object VcfWithVcf extends ToolCommand { @@ -26,30 +28,29 @@ object VcfWithVcf extends ToolCommand { } class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('I', "inputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(inputFile = x) } - opt[File]('O', "outputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('O', "outputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(outputFile = x) } - opt[File]('S', "secondaryVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + opt[File]('S', "secondaryVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(secondaryVcf = x) } - opt[String]('f', "field") unbounded () valueName ("<field> or <input_field:output_field> or <input_field:output_field:method>") action { (x, c) => + opt[String]('f', "field") unbounded () valueName "<field> or <input_field:output_field> or <input_field:output_field:method>" action { (x, c) => val values = x.split(":") if (values.size > 2) c.copy(fields = Fields(values(0), values(1), FieldMethod.withName(values(2))) :: c.fields) else if (values.size > 1) c.copy(fields = Fields(values(0), values(1)) :: c.fields) else c.copy(fields = Fields(x, x) :: c.fields) - } text ("""| If only <field> is given, the field's identifier in the output VCF will be identical to <field>. - | By default we will return all values found for a given field. - | With <method> the values will processed after getting it from the secondary VCF file, posible methods are: - | - max : takes maximum of found value, only works for numeric (integer/float) fields - | - min : takes minemal of found value, only works for numeric (integer/float) fields - | - unique: takes only unique values """.stripMargin - ) - opt[Boolean]("match") valueName ("<Boolean>") maxOccurs (1) action { (x, c) => + } text """| If only <field> is given, the field's identifier in the output VCF will be identical to <field>. + | By default we will return all values found for a given field. + | With <method> the values will processed after getting it from the secondary VCF file, posible methods are: + | - max : takes maximum of found value, only works for numeric (integer/float) fields + | - min : takes minemal of found value, only works for numeric (integer/float) fields + | - unique: takes only unique values """.stripMargin + opt[Boolean]("match") valueName "<Boolean>" maxOccurs 1 action { (x, c) => c.copy(matchAllele = x) - } text ("Match alternative alleles; default true") + } text "Match alternative alleles; default true" } def main(args: Array[String]): Unit = { @@ -88,18 +89,16 @@ object VcfWithVcf extends ToolCommand { for (record <- reader) { val secondaryRecords = if (commandArgs.matchAllele) { secondaryReader.query(record.getChr, record.getStart, record.getEnd).toList. - filter(x => record.getAlternateAlleles.exists(x.hasAlternateAllele(_))) + filter(x => record.getAlternateAlleles.exists(x.hasAlternateAllele)) } else { secondaryReader.query(record.getChr, record.getStart, record.getEnd).toList } val fieldMap = (for ( - f <- commandArgs.fields; - if secondaryRecords.exists(_.hasAttribute(f.inputField)) + f <- commandArgs.fields if secondaryRecords.exists(_.hasAttribute(f.inputField)) ) yield { f.outputField -> (for ( - secondRecord <- secondaryRecords; - if (secondRecord.hasAttribute(f.inputField)) + secondRecord <- secondaryRecords if secondRecord.hasAttribute(f.inputField) ) yield { secondRecord.getAttribute(f.inputField) match { case l: List[_] => l @@ -110,20 +109,18 @@ object VcfWithVcf extends ToolCommand { writer.add(fieldMap.foldLeft(new VariantContextBuilder(record))((builder, attribute) => { builder.attribute(attribute._1, commandArgs.fields.filter(_.outputField == attribute._1).head.fieldMethod match { - case FieldMethod.max => { + case FieldMethod.max => header.getInfoHeaderLine(attribute._1).getType match { case VCFHeaderLineType.Integer => Array(attribute._2.map(_.toString.toInt).max) case VCFHeaderLineType.Float => Array(attribute._2.map(_.toString.toFloat).max) case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric") } - } - case FieldMethod.min => { + case FieldMethod.min => header.getInfoHeaderLine(attribute._1).getType match { case VCFHeaderLineType.Integer => Array(attribute._2.map(_.toString.toInt).min) case VCFHeaderLineType.Float => Array(attribute._2.map(_.toString.toFloat).min) case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric") } - } case FieldMethod.unique => attribute._2.distinct.toArray case _ => attribute._2.toArray }) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala index c1c090e26aaf24861d009150dcb426a2d9b5675b..b953184bd7db2c67faaa85cc01cb9d5968e77d7c 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala @@ -191,7 +191,7 @@ object VepNormalizer extends ToolCommand { def explodeTranscripts(record: VariantContext, csqInfos: Array[String], removeCsq: Boolean): Array[VariantContext] = { for (transcript <- parseCsq(record)) yield { (for ( - fieldId <- 0 until csqInfos.size if transcript.isDefinedAt(fieldId); + fieldId <- csqInfos.indices if transcript.isDefinedAt(fieldId); value = transcript(fieldId) if value.nonEmpty ) yield csqInfos(fieldId) -> value) .filterNot(_._2.isEmpty) @@ -203,7 +203,7 @@ object VepNormalizer extends ToolCommand { def standardTranscripts(record: VariantContext, csqInfos: Array[String], removeCsq: Boolean): VariantContext = { val attribs = parseCsq(record) - (for (fieldId <- 0 until csqInfos.size) yield csqInfos(fieldId) -> { + (for (fieldId <- csqInfos.indices) yield csqInfos(fieldId) -> { for ( transcript <- attribs if transcript.isDefinedAt(fieldId); value = transcript(fieldId) if value.nonEmpty diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala index d047b3d8ba997e6bcab52c583ae5d003310cb641..4060560236166e49d80d104b09a682d116afb16f 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala @@ -257,7 +257,7 @@ object WipeReads extends ToolCommand { /** filter function for read IDs */ val rgFilter = - if (readGroupIds.size == 0) + if (readGroupIds.isEmpty) (r: SAMRecord) => true else (r: SAMRecord) => readGroupIds.contains(r.getReadGroup.getReadGroupId) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala index 4e08cfecb11fdaba1b845de0bd3a1fe40eb3121d..12167b9b9a60fa379b100d8b1280d8fa0fc8e990 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala @@ -44,17 +44,16 @@ object ConfigUtils extends Logging { else if (!map1.contains(key)) newMap += (key -> map2(key)) else { map1(key) match { - case m1: Map[_, _] => { + case m1: Map[_, _] => map2(key) match { case m2: Map[_, _] => newMap += (key -> mergeMaps(any2map(m1), any2map(m2))) case _ => newMap += (key -> map1(key)) } - } case _ => newMap += (key -> resolveConflict(map1(key), map2(key), key)) } } } - return newMap + newMap } /** @@ -79,7 +78,7 @@ object ConfigUtils extends Logging { */ def getValueFromPath(map: Map[String, Any], path: List[String]): Option[Any] = { val value = map.get(path.head) - if (path.tail == Nil || value == None) value + if (path.tail.isEmpty || value.isEmpty) value else value.get match { case map: Map[_, _] => getValueFromPath(map.asInstanceOf[Map[String, Any]], path.tail) case map: java.util.LinkedHashMap[_, _] => getValueFromPath(map.toMap.asInstanceOf[Map[String, Any]], path.tail) @@ -107,7 +106,7 @@ object ConfigUtils extends Logging { else jsonToMap(fileToJson(configFile)) } logger.debug("Contain: " + configMap) - return configMap + configMap } /** Convert a yaml file to map[String, Any] */ @@ -126,23 +125,23 @@ object ConfigUtils extends Logging { output += (key -> value) } } else throw new IllegalStateException("Given value is no json object: " + json) - return output + output } /** Convert json value to native scala value */ def jsonToAny(json: Json): Any = { - if (json.isObject) return jsonToMap(json) + if (json.isObject) jsonToMap(json) else if (json.isArray) { var list: List[Any] = List() for (value <- json.array.get) list ::= jsonToAny(value) - return list.reverse - } else if (json.isBool) return json.bool.get - else if (json.isString) return json.string.get.toString + list.reverse + } else if (json.isBool) json.bool.get + else if (json.isString) json.string.get.toString else if (json.isNumber) { val num = json.number.get - if (num % 1 > 0) return num.toDouble - else return num.toLong - } else if (json.isNull) return None + if (num % 1 > 0) num.toDouble + else num.toLong + } else if (json.isNull) None else throw new IllegalStateException("Config value type not supported, value: " + json) } @@ -163,7 +162,7 @@ object ConfigUtils extends Logging { case None => Json.jNull case Some(x) => anyToJson(x) case m: Map[_, _] => mapToJson(m.map(m => m._1.toString -> anyToJson(m._2))) - case l: List[_] => Json.array(l.map(anyToJson(_)): _*) + case l: List[_] => Json.array(l.map(anyToJson): _*) case b: Boolean => Json.jBool(b) case n: Int => Json.jNumberOrString(n) case n: Double => Json.jNumberOrString(n) @@ -190,10 +189,9 @@ object ConfigUtils extends Logging { case i: Int => i case i: Double => i.toInt case i: Long => i.toInt - case i: String => { + case i: String => logger.warn("Value '" + any + "' is a string insteadof int in json file, trying auto convert") i.toInt - } case _ => throw new IllegalStateException("Value '" + any + "' is not an int") } } @@ -204,10 +202,9 @@ object ConfigUtils extends Logging { case l: Double => l.toLong case l: Int => l.toLong case l: Long => l - case l: String => { + case l: String => logger.warn("Value '" + any + "' is a string insteadof int in json file, trying auto convert") l.toLong - } case _ => throw new IllegalStateException("Value '" + any + "' is not an int") } } @@ -219,10 +216,9 @@ object ConfigUtils extends Logging { case d: Float => d.toDouble case d: Int => d.toDouble case f: Long => f.toDouble - case d: String => { + case d: String => logger.warn("Value '" + any + "' is a string insteadof int in json file, trying auto convert") - return d.toDouble - } + d.toDouble case _ => throw new IllegalStateException("Value '" + any + "' is not an number") } } @@ -234,10 +230,9 @@ object ConfigUtils extends Logging { case f: Int => f.toFloat case f: Long => f.toFloat case f: Float => f - case f: String => { + case f: String => logger.warn("Value '" + any + "' is a string insteadof int in json file, trying auto convert") f.toFloat - } case _ => throw new IllegalStateException("Value '" + any + "' is not an number") } } @@ -246,14 +241,12 @@ object ConfigUtils extends Logging { def any2boolean(any: Any): Boolean = { any match { case b: Boolean => b - case b: String => { + case b: String => logger.warn("Value '" + any + "' is a string insteadof boolean in json file, trying auto convert") b.contains("true") - } - case b: Int => { + case b: Int => logger.warn("Value '" + any + "' is a int insteadof boolean in json file, trying auto convert") - (b > 0) - } + b > 0 case _ => throw new IllegalStateException("Value '" + any + "' is not an boolean") } } @@ -308,7 +301,7 @@ object ConfigUtils extends Logging { if (!exist) BiopetQScript.addError("Value does not exist but is required, key: " + value.requestIndex.key + " module: " + value.requestIndex.module, - (if (value.requestIndex.path != Nil) " path: " + value.requestIndex.path.mkString("->") else null)) + if (value.requestIndex.path != Nil) " path: " + value.requestIndex.path.mkString("->") else null) exist } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/IoUtils.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/IoUtils.scala index 6333763bb35421ebace6a758cddb705f90ab64ef..49bf3fe86cd5664aea9e96000ebd2291ed5fa9b2 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/IoUtils.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/IoUtils.scala @@ -3,6 +3,8 @@ package nl.lumc.sasc.biopet.utils import java.io.{ File, FileInputStream, FileOutputStream, InputStream } /** + * This object contains generic io methods + * * Created by pjvan_thof on 6/4/15. */ object IoUtils { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala index 886d3985a17f5454c83cb73c48ef1190162d132d..9074ff1f3bcc71fda5ad5c8aa9ba5ae5867fc6c8 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala @@ -41,6 +41,6 @@ object VcfUtils { * @return */ def fillAllele(bases: String, newSize: Int, fillWith: Char = '-'): String = { - bases + (Array.fill[Char](newSize - bases.size)(fillWith)).mkString + bases + Array.fill[Char](newSize - bases.length)(fillWith).mkString } }