Commit e5bbde4d authored by rhpvorderman's avatar rhpvorderman

Formatted scala files with scalafmt

parent 1e95dc46
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.bamtobigwig
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.{BiopetQScript, PipelineCommand}
import nl.lumc.sasc.biopet.extensions.WigToBigWig
import nl.lumc.sasc.biopet.extensions.igvtools.IGVToolsCount
import org.broadinstitute.gatk.queue.QScript
/**
* Pipeline to create BigWigle files from a bam file
*
* Created by pjvan_thof on 1/29/15.
*/
* Pipeline to create BigWigle files from a bam file
*
* Created by pjvan_thof on 1/29/15.
*/
class Bam2Wig(val parent: Configurable) extends QScript with BiopetQScript {
def this() = this(null)
......@@ -70,4 +70,4 @@ object Bam2Wig extends PipelineCommand {
bamToBigWig.bamFile = bamFile
bamToBigWig
}
}
\ No newline at end of file
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.bamtobigwig
import java.io.{ File, PrintWriter }
import java.io.{File, PrintWriter}
import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.collection.JavaConversions._
/**
* Class to extract chrom.sizes files from a bam file
*
* Created by pjvan_thof on 1/29/15.
*/
* Class to extract chrom.sizes files from a bam file
*
* Created by pjvan_thof on 1/29/15.
*/
class BamToChromSizes(val parent: Configurable) extends InProcessFunction with Configurable {
@Input
var bamFile: File = _
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.bamtobigwig
import java.io.File
......@@ -24,8 +24,8 @@ import org.testng.annotations.Test
import scala.io.Source
/**
* Created by pjvanthof on 09/05/16.
*/
* Created by pjvanthof on 09/05/16.
*/
class BamToChromSizesTest extends TestNGSuite with Matchers {
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
......@@ -39,6 +39,7 @@ class BamToChromSizesTest extends TestNGSuite with Matchers {
bamToChromSizes.chromSizesFile = File.createTempFile("chrom.", ".sizes")
bamToChromSizes.chromSizesFile.deleteOnExit()
bamToChromSizes.run()
Source.fromFile(bamToChromSizes.chromSizesFile).getLines().toList shouldBe List("chrQ\t10000", "chrR\t10000")
Source.fromFile(bamToChromSizes.chromSizesFile).getLines().toList shouldBe List("chrQ\t10000",
"chrR\t10000")
}
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.bammetrics
import java.io.File
import nl.lumc.sasc.biopet.core.annotations.{ AnnotationRefFlat, RibosomalRefFlat }
import nl.lumc.sasc.biopet.core.annotations.{AnnotationRefFlat, RibosomalRefFlat}
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect, BedtoolsSort }
import nl.lumc.sasc.biopet.core.{BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag}
import nl.lumc.sasc.biopet.extensions.bedtools.{BedtoolsCoverage, BedtoolsIntersect, BedtoolsSort}
import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
import nl.lumc.sasc.biopet.extensions.tools.{ BamStats, BiopetFlagstat }
import nl.lumc.sasc.biopet.extensions.tools.{BamStats, BiopetFlagstat}
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.intervals.BedCheck
import org.broadinstitute.gatk.queue.QScript
class BamMetrics(val parent: Configurable) extends QScript
with SummaryQScript
with SampleLibraryTag
with Reference
with TargetRegions
with AnnotationRefFlat
with RibosomalRefFlat {
class BamMetrics(val parent: Configurable)
extends QScript
with SummaryQScript
with SampleLibraryTag
with Reference
with TargetRegions
with AnnotationRefFlat
with RibosomalRefFlat {
def this() = this(null)
......@@ -44,30 +45,32 @@ class BamMetrics(val parent: Configurable) extends QScript
override def defaults = Map("bedtoolscoverage" -> Map("sorted" -> true))
/** returns files to store in summary */
def summaryFiles = Map("reference" -> referenceFasta(),
"input_bam" -> inputBam) ++
ampliconBedFile.map("amplicon" -> _).toMap ++
ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap
def summaryFiles =
Map("reference" -> referenceFasta(), "input_bam" -> inputBam) ++
ampliconBedFile.map("amplicon" -> _).toMap ++
ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap
/** return settings */
def summarySettings = Map("amplicon_name" -> ampliconBedFile.collect { case x => x.getName.stripSuffix(".bed") },
"roi_name" -> roiBedFiles.map(_.getName.stripSuffix(".bed")))
def summarySettings =
Map("amplicon_name" -> ampliconBedFile.collect { case x => x.getName.stripSuffix(".bed") },
"roi_name" -> roiBedFiles.map(_.getName.stripSuffix(".bed")))
override def reportClass = {
val bammetricsReport = new BammetricsReport(this)
bammetricsReport.outputDir = new File(outputDir, "report")
bammetricsReport.summaryDbFile = summaryDbFile
bammetricsReport.args = if (libId.isDefined) Map(
"sampleId" -> sampleId.getOrElse("."),
"libId" -> libId.getOrElse("."))
else Map("sampleId" -> sampleId.getOrElse("."))
bammetricsReport.args =
if (libId.isDefined)
Map("sampleId" -> sampleId.getOrElse("."), "libId" -> libId.getOrElse("."))
else Map("sampleId" -> sampleId.getOrElse("."))
Some(bammetricsReport)
}
/** executed before script */
def init(): Unit = {
inputFiles :+= new InputFile(inputBam)
ampliconBedFile.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
ampliconBedFile.foreach(
BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
roiBedFiles.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
}
......@@ -129,13 +132,19 @@ class BamMetrics(val parent: Configurable) extends QScript
ampBedToInterval.isIntermediate = true
add(ampBedToInterval)
val chsMetrics = CollectHsMetrics(this, inputBam,
List(ampIntervals), ampIntervals :: roiIntervals.map(_.intervals), outputDir)
val chsMetrics = CollectHsMetrics(this,
inputBam,
List(ampIntervals),
ampIntervals :: roiIntervals.map(_.intervals),
outputDir)
add(chsMetrics)
addSummarizable(chsMetrics, "hs_metrics")
val pcrMetrics = CollectTargetedPcrMetrics(this, inputBam,
ampIntervals, ampIntervals :: roiIntervals.map(_.intervals), outputDir)
val pcrMetrics = CollectTargetedPcrMetrics(this,
inputBam,
ampIntervals,
ampIntervals :: roiIntervals.map(_.intervals),
outputDir)
add(pcrMetrics)
addSummarizable(pcrMetrics, "targeted_pcr_metrics")
......@@ -145,36 +154,48 @@ class BamMetrics(val parent: Configurable) extends QScript
// Create stats and coverage plot for each bed/interval file
val allIntervalNames = (roiIntervals ++ ampIntervals).map(_.bed.getName)
if (allIntervalNames.size != allIntervalNames.toSet.size) {
logger.warn("There are multiple region files with the same name. Metric values might get overwritten")
logger.warn(
"There are multiple region files with the same name. Metric values might get overwritten")
}
for (intervals <- roiIntervals ++ ampIntervals) {
val targetName = intervals.bed.getName.stripSuffix(".bed")
val targetDir = new File(outputDir, targetName)
val biStrict = BedtoolsIntersect(this, inputBam, intervals.bed,
val biStrict = BedtoolsIntersect(
this,
inputBam,
intervals.bed,
output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.sam"),
minOverlap = config("strict_intersect_overlap", default = 1.0))
minOverlap = config("strict_intersect_overlap", default = 1.0)
)
val biopetFlagstatStrict = BiopetFlagstat(this, biStrict.output, targetDir)
addSummarizable(biopetFlagstatStrict, targetName + "_flagstats_strict")
add(new BiopetFifoPipe(this, List(biStrict, biopetFlagstatStrict)))
val biLoose = BedtoolsIntersect(this, inputBam, intervals.bed,
val biLoose = BedtoolsIntersect(
this,
inputBam,
intervals.bed,
output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.sam"),
minOverlap = config("loose_intersect_overlap", default = 0.01))
minOverlap = config("loose_intersect_overlap", default = 0.01)
)
val biopetFlagstatLoose = BiopetFlagstat(this, biLoose.output, targetDir)
addSummarizable(biopetFlagstatLoose, targetName + "_flagstats_loose")
add(new BiopetFifoPipe(this, List(biLoose, biopetFlagstatLoose)))
val sortedBed = BamMetrics.sortedbedCache.getOrElse(intervals.bed, {
val sorter = new BedtoolsSort(this)
sorter.input = intervals.bed
sorter.output = swapExt(targetDir, intervals.bed, ".bed", ".sorted.bed")
add(sorter)
BamMetrics.sortedbedCache += intervals.bed -> sorter.output
sorter.output
})
val sortedBed = BamMetrics.sortedbedCache.getOrElse(
intervals.bed, {
val sorter = new BedtoolsSort(this)
sorter.input = intervals.bed
sorter.output = swapExt(targetDir, intervals.bed, ".bed", ".sorted.bed")
add(sorter)
BamMetrics.sortedbedCache += intervals.bed -> sorter.output
sorter.output
}
)
val bedCov = BedtoolsCoverage(this, sortedBed, inputBam, depth = true)
val covStats = CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
val covStats =
CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
covStats.title = Some("Coverage Plot")
covStats.subTitle = Some(s"for file '$targetName.bed'")
add(bedCov | covStats)
......@@ -186,9 +207,11 @@ class BamMetrics(val parent: Configurable) extends QScript
}
object BamMetrics extends PipelineCommand {
/** Make default implementation of BamMetrics and runs script already */
def apply(root: Configurable,
bamFile: File, outputDir: File,
bamFile: File,
outputDir: File,
sampleId: Option[String] = None,
libId: Option[String] = None): BamMetrics = {
val bamMetrics = new BamMetrics(root)
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.bammetrics
import java.io.File
......@@ -19,9 +19,10 @@ import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
/**
* Created by pjvan_thof on 11/20/15.
*/
* Created by pjvan_thof on 11/20/15.
*/
trait TargetRegions extends Configurable {
/** Bed files for region of interests */
var roiBedFiles: List[File] = config("regions_of_interest", Nil)
......