diff --git a/.gitignore b/.gitignore index 77de2f25e244b88f845d817edc590690e679258e..f490a5f29f82bf37af38f4aa340165b10f96f173 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,5 @@ git.properties .idea/* *.iml target/ -public/target/ -protected/target/ site/ *.sc \ No newline at end of file diff --git a/public/LICENSE b/LICENSE similarity index 100% rename from public/LICENSE rename to LICENSE diff --git a/README.md b/README.md index 1a63d56e3994f15cdea97b62577cd32ff613e365..191890bfa86a19d9c8a3a3fa6540339465d90d46 100755 --- a/README.md +++ b/README.md @@ -55,18 +55,18 @@ At the moment, we do not provide links to download the Biopet package. If you ar ## Contributing to Biopet -Biopet is based on the Queue framework developed by the Broad Institute as part of their Genome Analysis Toolkit (GATK) framework. The current Biopet release is based on the GATK 3.4 release. +Biopet is based on the Queue framework developed by the Broad Institute as part of their Genome Analysis Toolkit (GATK) framework. The current Biopet release is based on the GATK 3.5 release. We welcome any kind of contribution, be it merge requests on the code base, documentation updates, or any kinds of other fixes! The main language we use is Scala, though the repository also contains a small bit of Python and R. Our main code repository is located at [https://github.com/biopet/biopet](https://github.com/biopet/biopet/issues), along with our issue tracker. ## Local development setup -To develop Biopet, Java 7, Maven 3.2.2, and GATK Queue 3.4 is required. Please consult the Java homepage and Maven homepage for the respective installation instruction. After you have both Java and Maven installed, you would then need to install GATK Queue. However, as the GATK Queue package is not yet available as an artifact in Maven Central, you will need to download, compile, and install GATK Queue first. +To develop Biopet, Java 7, Maven 3.3.3, and GATK Queue 3.5 is required. Please consult the Java homepage and Maven homepage for the respective installation instruction. After you have both Java and Maven installed, you would then need to install GATK Queue. However, as the GATK Queue package is not yet available as an artifact in Maven Central, you will need to download, compile, and install GATK Queue first. ~~~ $ git clone https://github.com/broadgsa/gatk-protected -$ cd gatk-protected -$ git checkout 3.4 # the current release is based on GATK 3.4 +$ cd gatk +$ git checkout 3.5 # the current release is based on GATK 3.5 $ mvn -U clean install ~~~ diff --git a/protected/biopet-gatk-extensions/.gitignore b/bam2wig/.gitignore similarity index 100% rename from protected/biopet-gatk-extensions/.gitignore rename to bam2wig/.gitignore diff --git a/public/bam2wig/pom.xml b/bam2wig/pom.xml similarity index 100% rename from public/bam2wig/pom.xml rename to bam2wig/pom.xml diff --git a/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala b/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala similarity index 100% rename from public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala rename to bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala diff --git a/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizes.scala b/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizes.scala similarity index 100% rename from public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizes.scala rename to bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizes.scala diff --git a/public/bam2wig/src/test/resources/log4j.properties b/bam2wig/src/test/resources/log4j.properties similarity index 100% rename from public/bam2wig/src/test/resources/log4j.properties rename to bam2wig/src/test/resources/log4j.properties diff --git a/protected/biopet-gatk-pipelines/.gitignore b/bammetrics/.gitignore similarity index 100% rename from protected/biopet-gatk-pipelines/.gitignore rename to bammetrics/.gitignore diff --git a/public/bammetrics/pom.xml b/bammetrics/pom.xml similarity index 100% rename from public/bammetrics/pom.xml rename to bammetrics/pom.xml diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/bamStats.ssp b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/bamStats.ssp similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/bamStats.ssp rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/bamStats.ssp diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/gcBias.ssp b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/gcBias.ssp similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/gcBias.ssp rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/gcBias.ssp diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/rna.ssp b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/rna.ssp similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/rna.ssp rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/rna.ssp diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/bedtools_cov_stats.py b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/bedtools_cov_stats.py similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/bedtools_cov_stats.py rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/bedtools_cov_stats.py diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/target.ssp b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/target.ssp similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/target.ssp rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/target.ssp diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp b/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp similarity index 100% rename from public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp rename to bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala similarity index 100% rename from public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala rename to bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala b/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala similarity index 99% rename from public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala rename to bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala index 5e34a2e0ca29120f9a92f2db3f16f96b80f95972..f91d49aa44ac69c331d3411af2624dc5491298c5 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala +++ b/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala @@ -98,6 +98,7 @@ object BammetricsReport extends ReportBuilder { /** * Generate a stackbar plot for alignment stats + * * @param outputDir OutputDir for the tsv and png file * @param prefix Prefix of the tsv and png file * @param summary Summary class @@ -161,6 +162,7 @@ object BammetricsReport extends ReportBuilder { /** * Generate a line plot for insertsize + * * @param outputDir OutputDir for the tsv and png file * @param prefix Prefix of the tsv and png file * @param summary Summary class @@ -249,6 +251,7 @@ object BammetricsReport extends ReportBuilder { /** * Generate a line plot for wgs coverage + * * @param outputDir OutputDir for the tsv and png file * @param prefix Prefix of the tsv and png file * @param summary Summary class @@ -339,6 +342,7 @@ object BammetricsReport extends ReportBuilder { /** * Generate a line plot for rna coverage + * * @param outputDir OutputDir for the tsv and png file * @param prefix Prefix of the tsv and png file * @param summary Summary class diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/TargetRegions.scala b/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/TargetRegions.scala similarity index 100% rename from public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/TargetRegions.scala rename to bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/TargetRegions.scala diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/CoverageStats.scala b/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/CoverageStats.scala similarity index 100% rename from public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/CoverageStats.scala rename to bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/CoverageStats.scala diff --git a/public/bammetrics/src/test/resources/log4j.properties b/bammetrics/src/test/resources/log4j.properties similarity index 100% rename from public/bammetrics/src/test/resources/log4j.properties rename to bammetrics/src/test/resources/log4j.properties diff --git a/protected/biopet-gatk-pipelines/src/test/resources/ref.dict b/bammetrics/src/test/resources/ref.dict similarity index 100% rename from protected/biopet-gatk-pipelines/src/test/resources/ref.dict rename to bammetrics/src/test/resources/ref.dict diff --git a/protected/biopet-gatk-pipelines/src/test/resources/ref.fa b/bammetrics/src/test/resources/ref.fa similarity index 100% rename from protected/biopet-gatk-pipelines/src/test/resources/ref.fa rename to bammetrics/src/test/resources/ref.fa diff --git a/protected/biopet-gatk-pipelines/src/test/resources/ref.fa.fai b/bammetrics/src/test/resources/ref.fa.fai similarity index 100% rename from protected/biopet-gatk-pipelines/src/test/resources/ref.fa.fai rename to bammetrics/src/test/resources/ref.fa.fai diff --git a/public/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala b/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala similarity index 100% rename from public/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala rename to bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala diff --git a/protected/biopet-protected-package/.gitignore b/basty/.gitignore similarity index 100% rename from protected/biopet-protected-package/.gitignore rename to basty/.gitignore diff --git a/public/basty/pom.xml b/basty/pom.xml similarity index 100% rename from public/basty/pom.xml rename to basty/pom.xml diff --git a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala b/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala similarity index 94% rename from public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala rename to basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala index 3202d4f37db17d1bb9b824c89e948e44d82fb829..3c03ebe2cd0839f9955623ce3f03da29e290b113 100644 --- a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala +++ b/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala @@ -22,26 +22,29 @@ package nl.lumc.sasc.biopet.pipelines.basty import java.io.File -import nl.lumc.sasc.biopet.core.MultiSampleQScript +import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand } import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins } -import nl.lumc.sasc.biopet.pipelines.shiva.{ Shiva, ShivaTrait } +import nl.lumc.sasc.biopet.pipelines.shiva.Shiva import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript -trait BastyTrait extends MultiSampleQScript { - qscript: QScript => +class Basty(val root: Configurable) extends QScript with MultiSampleQScript { + qscript => + + def this() = this(null) case class FastaOutput(variants: File, consensus: File, consensusVariants: File) - def variantcallers = List("freebayes") + def variantcallers = List("unifiedgenotyper") override def defaults = Map( "ploidy" -> 1, "variantcallers" -> variantcallers ) - lazy val shiva: ShivaTrait = new Shiva(qscript) + lazy val shiva = new Shiva(qscript) def summaryFile: File = new File(outputDir, "Basty.summary.json") @@ -194,3 +197,5 @@ trait BastyTrait extends MultiSampleQScript { FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants) } } + +object Basty extends PipelineCommand diff --git a/public/basty/src/test/resources/log4j.properties b/basty/src/test/resources/log4j.properties similarity index 100% rename from public/basty/src/test/resources/log4j.properties rename to basty/src/test/resources/log4j.properties diff --git a/biopet-aggregate/pom.xml b/biopet-aggregate/pom.xml index ca3283801777ad853c1f690cdadd9557cc296602..6a6f99a731013c51a6366d6cb60bec0e4313f84c 100644 --- a/biopet-aggregate/pom.xml +++ b/biopet-aggregate/pom.xml @@ -11,35 +11,32 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.6.0-SNAPSHOT</version> - <relativePath>../public</relativePath> + <version>0.7.0-SNAPSHOT</version> + <relativePath>../</relativePath> </parent> <modules> - <module>../public/biopet-core</module> - <module>../public/biopet-public-package</module> - <module>../public/bammetrics</module> - <module>../public/flexiprep</module> - <module>../public/gentrap</module> - <module>../public/mapping</module> - <module>../public/sage</module> - <module>../public/kopisu</module> - <module>../public/gears</module> - <module>../public/bam2wig</module> - <module>../public/carp</module> - <module>../public/toucan</module> - <module>../public/gwas-test</module> - <module>../public/shiva</module> - <module>../public/basty</module> - <module>../public/tinycap</module> - <module>../public/biopet-utils</module> - <module>../public/biopet-tools</module> - <module>../public/biopet-tools-extensions</module> - <module>../public/biopet-extensions</module> - <module>../public/biopet-tools-package</module> - <module>../protected/biopet-gatk-extensions</module> - <module>../protected/biopet-gatk-pipelines</module> - <module>../protected/biopet-protected-package</module> + <module>../biopet-core</module> + <module>../biopet-package</module> + <module>../bammetrics</module> + <module>../flexiprep</module> + <module>../gentrap</module> + <module>../mapping</module> + <module>../sage</module> + <module>../kopisu</module> + <module>../gears</module> + <module>../bam2wig</module> + <module>../carp</module> + <module>../toucan</module> + <module>../gwas-test</module> + <module>../shiva</module> + <module>../basty</module> + <module>../tinycap</module> + <module>../biopet-utils</module> + <module>../biopet-tools</module> + <module>../biopet-tools-extensions</module> + <module>../biopet-extensions</module> + <module>../biopet-tools-package</module> </modules> </project> \ No newline at end of file diff --git a/public/biopet-core/pom.xml b/biopet-core/pom.xml similarity index 100% rename from public/biopet-core/pom.xml rename to biopet-core/pom.xml diff --git a/public/biopet-core/src/main/resources/log4j.properties b/biopet-core/src/main/resources/log4j.properties similarity index 100% rename from public/biopet-core/src/main/resources/log4j.properties rename to biopet-core/src/main/resources/log4j.properties diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/License.txt b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/License.txt similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/License.txt rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/License.txt diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/executables.ssp b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/executables.ssp similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/executables.ssp rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/executables.ssp diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap-theme.min.css b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap-theme.min.css similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap-theme.min.css rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap-theme.min.css diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap.min.css b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap.min.css similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap.min.css rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap.min.css diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap_dashboard.css b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap_dashboard.css similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap_dashboard.css rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap_dashboard.css diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/sortable-theme-bootstrap.css b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/sortable-theme-bootstrap.css similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/sortable-theme-bootstrap.css rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/sortable-theme-bootstrap.css diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/fonts/glyphicons-halflings-regular.ttf b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/fonts/glyphicons-halflings-regular.ttf similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/fonts/glyphicons-halflings-regular.ttf rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/fonts/glyphicons-halflings-regular.ttf diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/fonts/glyphicons-halflings-regular.woff b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/fonts/glyphicons-halflings-regular.woff similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/fonts/glyphicons-halflings-regular.woff rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/fonts/glyphicons-halflings-regular.woff diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/fonts/glyphicons-halflings-regular.woff2 b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/fonts/glyphicons-halflings-regular.woff2 similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/fonts/glyphicons-halflings-regular.woff2 rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/fonts/glyphicons-halflings-regular.woff2 diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/bootstrap.min.js b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/bootstrap.min.js similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/bootstrap.min.js rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/bootstrap.min.js diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/d3.v3.5.5.min.js b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/d3.v3.5.5.min.js similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/d3.v3.5.5.min.js rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/d3.v3.5.5.min.js diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/jquery.min.js b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/jquery.min.js similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/jquery.min.js rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/jquery.min.js diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/sortable.min.js b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/sortable.min.js similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/sortable.min.js rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/sortable.min.js diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/krona.ssp b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/krona.ssp similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/krona.ssp rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/krona.ssp diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/librariesList.ssp b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/librariesList.ssp similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/librariesList.ssp rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/librariesList.ssp diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/main.ssp b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/main.ssp similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/main.ssp rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/main.ssp diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/reference.ssp b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/reference.ssp similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/reference.ssp rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/reference.ssp diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/samplesList.ssp b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/samplesList.ssp similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/samplesList.ssp rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/samplesList.ssp diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/tools/plotHeatmap.R b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/tools/plotHeatmap.R similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/tools/plotHeatmap.R rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/tools/plotHeatmap.R diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotScatter.R b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotScatter.R similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotScatter.R rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotScatter.R diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotXY.R b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotXY.R similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotXY.R rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotXY.R diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/stackedBar.R b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/stackedBar.R similarity index 100% rename from public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/stackedBar.R rename to biopet-core/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/stackedBar.R diff --git a/public/biopet-core/src/main/resources/org/broadinstitute/gatk/queue/util/queueJobReport.R b/biopet-core/src/main/resources/org/broadinstitute/gatk/queue/util/queueJobReport.R similarity index 100% rename from public/biopet-core/src/main/resources/org/broadinstitute/gatk/queue/util/queueJobReport.R rename to biopet-core/src/main/resources/org/broadinstitute/gatk/queue/util/queueJobReport.R diff --git a/public/biopet-core/src/main/resources/picard/analysis/baseDistributionByCycle.R b/biopet-core/src/main/resources/picard/analysis/baseDistributionByCycle.R similarity index 100% rename from public/biopet-core/src/main/resources/picard/analysis/baseDistributionByCycle.R rename to biopet-core/src/main/resources/picard/analysis/baseDistributionByCycle.R diff --git a/public/biopet-core/src/main/resources/picard/analysis/gcBias.R b/biopet-core/src/main/resources/picard/analysis/gcBias.R similarity index 100% rename from public/biopet-core/src/main/resources/picard/analysis/gcBias.R rename to biopet-core/src/main/resources/picard/analysis/gcBias.R diff --git a/public/biopet-core/src/main/resources/picard/analysis/insertSizeHistogram.R b/biopet-core/src/main/resources/picard/analysis/insertSizeHistogram.R similarity index 100% rename from public/biopet-core/src/main/resources/picard/analysis/insertSizeHistogram.R rename to biopet-core/src/main/resources/picard/analysis/insertSizeHistogram.R diff --git a/public/biopet-core/src/main/resources/picard/analysis/meanQualityByCycle.R b/biopet-core/src/main/resources/picard/analysis/meanQualityByCycle.R similarity index 100% rename from public/biopet-core/src/main/resources/picard/analysis/meanQualityByCycle.R rename to biopet-core/src/main/resources/picard/analysis/meanQualityByCycle.R diff --git a/public/biopet-core/src/main/resources/picard/analysis/qualityScoreDistribution.R b/biopet-core/src/main/resources/picard/analysis/qualityScoreDistribution.R similarity index 100% rename from public/biopet-core/src/main/resources/picard/analysis/qualityScoreDistribution.R rename to biopet-core/src/main/resources/picard/analysis/qualityScoreDistribution.R diff --git a/public/biopet-core/src/main/resources/picard/analysis/rnaSeqCoverage.R b/biopet-core/src/main/resources/picard/analysis/rnaSeqCoverage.R similarity index 100% rename from public/biopet-core/src/main/resources/picard/analysis/rnaSeqCoverage.R rename to biopet-core/src/main/resources/picard/analysis/rnaSeqCoverage.R diff --git a/public/biopet-core/src/main/resources/picard/analysis/rrbsQc.R b/biopet-core/src/main/resources/picard/analysis/rrbsQc.R similarity index 100% rename from public/biopet-core/src/main/resources/picard/analysis/rrbsQc.R rename to biopet-core/src/main/resources/picard/analysis/rrbsQc.R diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala similarity index 98% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala index f367abc4c5c864215277872818ff306fb4c93a14..d634350fb374a04ce38567c49ab1709c0bfa8c24 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala +++ b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala @@ -15,11 +15,11 @@ */ package nl.lumc.sasc.biopet.core -import java.io.{ PrintWriter, File, FileInputStream } +import java.io.{ File, FileInputStream, PrintWriter } import java.security.MessageDigest import nl.lumc.sasc.biopet.utils.Logging -import org.broadinstitute.gatk.utils.commandline.{ Output, Input } +import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output } import org.broadinstitute.gatk.utils.runtime.ProcessSettings import org.ggf.drmaa.JobTemplate @@ -35,9 +35,6 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => @Input(doc = "deps", required = false) var deps: List[File] = Nil - @Output - var outputFiles: List[File] = Nil - var executable: String = _ /** This is the default shell for drmaa jobs */ diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala similarity index 97% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala index 287064130a8a055b7457a2e583517c8e60a1b5df..16626532ce11401652fef73d9320b689a8dc138d 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala +++ b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala @@ -18,6 +18,7 @@ package nl.lumc.sasc.biopet.core import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.Output /** * Created by pjvan_thof on 9/29/15. @@ -46,6 +47,9 @@ class BiopetFifoPipe(val root: Configurable, ) yield outputFile } + @Output + private var outputFiles: List[File] = Nil + override def beforeGraph(): Unit = { val outputs: Map[BiopetCommandLineFunction, Seq[File]] = try { commands.map(x => x -> x.outputs).toMap diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala similarity index 97% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala index 785db5662f074ca7c7691d72e732ec5b0c760092..35bcca8efa8f907770564f63dc91cb2f85510474 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala +++ b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala @@ -205,11 +205,12 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript => logger.info(s"Starting script for ${samples.size} samples") var count = 0 if (onlySamples.isEmpty || samples.forall(x => onlySamples.contains(x._1))) { - samples.foreach { case (sampleId, sample) => - logger.info(s"Starting script sample '$sampleId'") - sample.addAndTrackJobs() - count += 1 - logger.info(s"Finish script for '$sampleId', samples done: $count / ${samples.size}") + samples.foreach { + case (sampleId, sample) => + logger.info(s"Starting script sample '$sampleId'") + sample.addAndTrackJobs() + count += 1 + logger.info(s"Finish script for '$sampleId', samples done: $count / ${samples.size}") } logger.info("Starting script for multisample jobs") addMultiSampleJobs() diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/SampleLibraryTag.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/SampleLibraryTag.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/SampleLibraryTag.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/SampleLibraryTag.scala diff --git a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/ScatterGatherableFunction.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/ScatterGatherableFunction.scala new file mode 100644 index 0000000000000000000000000000000000000000..79fd7fa8e9c953bce65d9efc95b9bd62573c86d6 --- /dev/null +++ b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/ScatterGatherableFunction.scala @@ -0,0 +1,9 @@ +package nl.lumc.sasc.biopet.core + +/** + * Created by pjvan_thof on 4/26/16. + */ +trait ScatterGatherableFunction extends BiopetCommandLineFunction + with org.broadinstitute.gatk.queue.function.scattergather.ScatterGatherableFunction { + scatterCount = config("scattercount", freeVar = true, default = 1) +} diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommandFunction.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommandFunction.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommandFunction.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommandFunction.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Version.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Version.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Version.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Version.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/annotations/Annotations.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/annotations/Annotations.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/annotations/Annotations.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/annotations/Annotations.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/CheckChecksum.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/CheckChecksum.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/CheckChecksum.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/CheckChecksum.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/Md5sum.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/Md5sum.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/Md5sum.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/Md5sum.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/PythonCommandLineFunction.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/PythonCommandLineFunction.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/PythonCommandLineFunction.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/PythonCommandLineFunction.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/MultisampleReportBuilder.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/MultisampleReportBuilder.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/MultisampleReportBuilder.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/MultisampleReportBuilder.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala similarity index 99% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala index 41a974fcc7d7fdc4da13d0f8e47ae621c3b8ac13..43af927eb622b8831a1a0343b9eecb761de433d8 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala +++ b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala @@ -189,6 +189,7 @@ trait ReportBuilder extends ToolCommand { /** * This method will render the page and the subpages recursivly + * * @param summary The summary object * @param page Page to render * @param outputDir Root output dir of the report @@ -204,7 +205,7 @@ trait ReportBuilder extends ToolCommand { val pageOutputDir = new File(outputDir, path.mkString(File.separator)) pageOutputDir.mkdirs() - val rootPath = "./" + Array.fill(path.size)("../").mkString("") + val rootPath = "./" + Array.fill(path.size)("src/main").mkString("") val pageArgs = args ++ page.args ++ Map("page" -> page, "path" -> path, diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportPage.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportPage.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportPage.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportPage.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportSection.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportSection.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportSection.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportSection.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala similarity index 100% rename from public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala rename to biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala diff --git a/public/biopet-core/src/test/resources/empty_summary.json b/biopet-core/src/test/resources/empty_summary.json similarity index 100% rename from public/biopet-core/src/test/resources/empty_summary.json rename to biopet-core/src/test/resources/empty_summary.json diff --git a/public/biopet-core/src/test/resources/fake_chrQ.dict b/biopet-core/src/test/resources/fake_chrQ.dict similarity index 100% rename from public/biopet-core/src/test/resources/fake_chrQ.dict rename to biopet-core/src/test/resources/fake_chrQ.dict diff --git a/public/biopet-core/src/test/resources/fake_chrQ.fa b/biopet-core/src/test/resources/fake_chrQ.fa similarity index 100% rename from public/biopet-core/src/test/resources/fake_chrQ.fa rename to biopet-core/src/test/resources/fake_chrQ.fa diff --git a/public/biopet-core/src/test/resources/fake_chrQ.fa.fai b/biopet-core/src/test/resources/fake_chrQ.fa.fai similarity index 100% rename from public/biopet-core/src/test/resources/fake_chrQ.fa.fai rename to biopet-core/src/test/resources/fake_chrQ.fa.fai diff --git a/public/biopet-core/src/test/resources/fake_chrQ_no_index.fa b/biopet-core/src/test/resources/fake_chrQ_no_index.fa similarity index 100% rename from public/biopet-core/src/test/resources/fake_chrQ_no_index.fa rename to biopet-core/src/test/resources/fake_chrQ_no_index.fa diff --git a/public/biopet-core/src/test/resources/log4j.properties b/biopet-core/src/test/resources/log4j.properties similarity index 100% rename from public/biopet-core/src/test/resources/log4j.properties rename to biopet-core/src/test/resources/log4j.properties diff --git a/public/biopet-core/src/test/resources/template.ssp b/biopet-core/src/test/resources/template.ssp similarity index 100% rename from public/biopet-core/src/test/resources/template.ssp rename to biopet-core/src/test/resources/template.ssp diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/BiopetPipeTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/BiopetPipeTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/BiopetPipeTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/BiopetPipeTest.scala diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/CommandLineResourcesTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/CommandLineResourcesTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/CommandLineResourcesTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/CommandLineResourcesTest.scala diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/MultiSampleQScriptTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/MultiSampleQScriptTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/MultiSampleQScriptTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/MultiSampleQScriptTest.scala diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/PipelineCommandTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/PipelineCommandTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/PipelineCommandTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/PipelineCommandTest.scala diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ReferenceTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ReferenceTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ReferenceTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ReferenceTest.scala diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/SampleLibraryTagTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/SampleLibraryTagTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/SampleLibraryTagTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/SampleLibraryTagTest.scala diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ToolCommandTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ToolCommandTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ToolCommandTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ToolCommandTest.scala diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/MultisampleReportBuilderTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/MultisampleReportBuilderTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/MultisampleReportBuilderTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/MultisampleReportBuilderTest.scala diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/ReportBuilderTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/ReportBuilderTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/ReportBuilderTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/ReportBuilderTest.scala diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/ReportSectionTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/ReportSectionTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/ReportSectionTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/ReportSectionTest.scala diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummarizableTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummarizableTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummarizableTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummarizableTest.scala diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScriptTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScriptTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScriptTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScriptTest.scala diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/WriteSummaryTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/WriteSummaryTest.scala similarity index 100% rename from public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/WriteSummaryTest.scala rename to biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/WriteSummaryTest.scala diff --git a/public/biopet-extensions/pom.xml b/biopet-extensions/pom.xml similarity index 100% rename from public/biopet-extensions/pom.xml rename to biopet-extensions/pom.xml diff --git a/public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/breakdancer/breakdancer2vcf.py b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/breakdancer/breakdancer2vcf.py similarity index 100% rename from public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/breakdancer/breakdancer2vcf.py rename to biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/breakdancer/breakdancer2vcf.py diff --git a/public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/samtools/fix_iupac_mpileup.py b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/samtools/fix_iupac_mpileup.py similarity index 100% rename from public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/samtools/fix_iupac_mpileup.py rename to biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/samtools/fix_iupac_mpileup.py diff --git a/public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/varscan/fix_mpileup.py b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/varscan/fix_mpileup.py similarity index 100% rename from public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/varscan/fix_mpileup.py rename to biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/varscan/fix_mpileup.py diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bgzip.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bgzip.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bgzip.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bgzip.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cat.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cat.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cat.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cat.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cufflinks.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cufflinks.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cufflinks.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cufflinks.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cuffquant.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cuffquant.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cuffquant.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cuffquant.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Curl.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Curl.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Curl.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Curl.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala similarity index 78% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala index 60c25a5a69820b72a7e5bbd0f17cc8b5f0dac3fe..fb99be4a4f8716c8f96fb6be12d9caa264b249be 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala @@ -24,6 +24,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import scala.collection.mutable import scala.io.Source +import scala.util.matching.Regex /** * Extension for cutadapt @@ -163,6 +164,51 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su (if (outputAsStsout) "" else required("--output", fastqOutput) + " > " + required(statsOutput)) + def extractClippedAdapters(statsOutput: File): Map[String, Any] = { + val histoCountRow: Regex = """([\d]+)\t([\d]+)\t.*""".r + val adapterR = """Sequence: ([C|T|A|G]+);.*Trimmed: ([\d]+) times\.""".r + + val statsFile = Source.fromFile(statsOutput) + val adapterRawStats: Array[String] = statsFile.mkString + .split("=== Adapter [\\d]+ ===") + .filter(_.contains("Sequence") + ) + statsFile.close() + + adapterRawStats.map(adapter => { + var adapterName = "" + var adapterCount = 0 + // identify the adapter name and count + for (line <- adapter.split("\n")) { + line match { + case adapterR(adapter, count) => { + adapterName = adapter + adapterCount = count.toInt + } + case _ => + } + } + + // parse the block that gives the histogram of clipped bases and from which end + val counts = adapter.split("Overview of removed sequences ") + .filter(x => x.contains("length")) + .map(clipSideRawStats => { + val clipSideLabel = if (clipSideRawStats.contains("5'")) { "5p" } else { "3p" } + + val histogramValues = clipSideRawStats.split("\n").flatMap({ + case histoCountRow(length, count) => Some(length.toInt -> count.toInt) + case _ => None + }) + clipSideLabel -> histogramValues.toMap + }) + + adapterName -> Map( + "count" -> adapterCount, + "histogram" -> counts.toMap + ) + }).toMap // converting the Array[String] containing map-items to Map with 'toMap' + } + /** Output summary stats */ def summaryStats: Map[String, Any] = { /** @@ -177,7 +223,6 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su val tooLongR = """.* that were too long: *([,\d]+) .*""".r val tooManyN = """.* with too many N: *([,\d]+) .*""".r - val adapterR = """Sequence ([C|T|A|G]*);.*Trimmed: ([,\d]+) times.""".r val basePairsProcessed = """Total basepairs processed: *([,\d]+) bp""".r val basePairsWritten = """Total written \(filtered\): *([,\d]+) bp .*""".r @@ -192,24 +237,28 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su "bpoutput" -> 0, "toomanyn" -> 0 ) - val adapterStats: mutable.Map[String, Long] = mutable.Map() + + // extract the adapters with its histogram + val adapterStats = if (statsOutput.exists) { + extractClippedAdapters(statsOutput) + } else Map.empty if (statsOutput.exists) { val statsFile = Source.fromFile(statsOutput) for (line <- statsFile.getLines()) { line match { - case processedReads(m) => stats("processed") = m.replaceAll(",", "").toLong - case withAdapters(m) => stats("withadapters") = m.replaceAll(",", "").toLong - case readsPassingFilters(m) => stats("passingfilters") = m.replaceAll(",", "").toLong - case tooShortR(m) => stats("tooshort") = m.replaceAll(",", "").toLong - case tooLongR(m) => stats("toolong") = m.replaceAll(",", "").toLong - case tooManyN(m) => stats("toomanyn") = m.replaceAll(",", "").toLong - case basePairsProcessed(m) => stats("bpinput") = m.replaceAll(",", "").toLong - case basePairsWritten(m) => stats("bpoutput") = m.replaceAll(",", "").toLong - case adapterR(adapter, count) => adapterStats += (adapter -> count.toLong) - case _ => + case processedReads(m) => stats("processed") = m.replaceAll(",", "").toLong + case withAdapters(m) => stats("withadapters") = m.replaceAll(",", "").toLong + case readsPassingFilters(m) => stats("passingfilters") = m.replaceAll(",", "").toLong + case tooShortR(m) => stats("tooshort") = m.replaceAll(",", "").toLong + case tooLongR(m) => stats("toolong") = m.replaceAll(",", "").toLong + case tooManyN(m) => stats("toomanyn") = m.replaceAll(",", "").toLong + case basePairsProcessed(m) => stats("bpinput") = m.replaceAll(",", "").toLong + case basePairsWritten(m) => stats("bpoutput") = m.replaceAll(",", "").toLong + case _ => } } + statsFile.close() } val cleanReads = stats("processed") - stats("withadapters") @@ -223,8 +272,8 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su "num_reads_discarded_too_long" -> stats("toolong"), "num_reads_discarded_many_n" -> stats("toomanyn"), "num_bases_input" -> stats("bpinput"), - "num_based_output" -> stats("bpoutput"), - adaptersStatsName -> adapterStats.toMap + "num_bases_output" -> stats("bpoutput"), + adaptersStatsName -> adapterStats ) } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Flash.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Flash.scala similarity index 88% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Flash.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Flash.scala index 7e4ef448859682383759a46083e66e12515106e1..dd2f35d0bc66846d656cc33f75980757c74ab85d 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Flash.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Flash.scala @@ -17,9 +17,9 @@ package nl.lumc.sasc.biopet.extensions import java.io.File -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version } import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.Input +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import scala.util.matching.Regex @@ -64,16 +64,33 @@ class Flash(val root: Configurable) extends BiopetCommandLineFunction with Versi private def suffix = outputSuffix.getOrElse("fastq") + (if (compress) ".gz" else "") + @Output + private var _combinedFastq: File = _ def combinedFastq = new File(outputDirectory, s"$outputPrefix.extendedFrags.$suffix") + + @Output + private var _notCombinedR1: File = _ def notCombinedR1 = new File(outputDirectory, s"$outputPrefix.notCombined_1.$suffix") + + @Output + private var _notCombinedR2: File = _ def notCombinedR2 = new File(outputDirectory, s"$outputPrefix.notCombined_2.$suffix") + + @Output + private var _outputHistogramTable: File = _ def outputHistogramTable = new File(outputDirectory, s"$outputPrefix.hist") + + @Output + private var _outputHistogram: File = _ def outputHistogram = new File(outputDirectory, s"$outputPrefix.histogram") override def beforeGraph(): Unit = { super.beforeGraph() - outputFiles :::= combinedFastq :: notCombinedR1 :: - notCombinedR2 :: outputHistogramTable :: outputHistogram :: Nil + _combinedFastq = combinedFastq + _notCombinedR1 = notCombinedR1 + _notCombinedR2 = notCombinedR2 + _outputHistogramTable = outputHistogramTable + _outputHistogram = outputHistogram } def cmdLine = executable + diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Freebayes.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Freebayes.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Freebayes.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Freebayes.scala diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Grep.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Grep.scala new file mode 100644 index 0000000000000000000000000000000000000000..2f7d65916c69340336474370cad52a526cd7f465 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Grep.scala @@ -0,0 +1,50 @@ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +/** + * Created by pjvanthof on 30/03/16. + */ +class Grep(val root: Configurable) extends BiopetCommandLineFunction { + @Input(doc = "Input file", required = true) + var input: File = _ + + @Output(doc = "Output file", required = true) + var output: File = _ + + executable = config("exe", default = "grep") + + var grepFor: String = null + + var invertMatch: Boolean = false + var regex: Boolean = false + var perlRegexp: Boolean = false + + /** return commandline to execute */ + def cmdLine = required(executable) + + conditional(invertMatch, "-v") + + conditional(regex, "-e") + + conditional(perlRegexp, "-P") + + required(grepFor) + + (if (inputAsStdin) "" else required(input)) + + (if (outputAsStsout) "" else " > " + required(output)) +} + +object Grep { + def apply(root: Configurable, + grepFor: String, + regex: Boolean = false, + invertMatch: Boolean = false, + perlRegexp: Boolean = false): Grep = { + val grep = new Grep(root) + grep.grepFor = grepFor + grep.regex = regex + grep.perlRegexp = perlRegexp + grep.invertMatch = invertMatch + grep + } +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gzip.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gzip.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gzip.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gzip.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/HtseqCount.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/HtseqCount.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/HtseqCount.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/HtseqCount.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pbzip2.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pbzip2.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pbzip2.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pbzip2.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala similarity index 98% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala index 5334ab9cfe64a98f2d4c81745404c11006884044..896bb597f8d47b87bb01ad15eba7a2a80d2a9bc6 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala @@ -49,6 +49,9 @@ class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction { var verbose: Boolean = config("verbose", default = false) var noCleanup: Boolean = config("no_cleanup", default = false) + @Output + var outputFiles: List[File] = Nil + /** Set correct output files */ override def beforeGraph(): Unit = { super.beforeGraph() diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Sha1sum.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Sha1sum.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Sha1sum.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Sha1sum.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Snptest.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Snptest.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Snptest.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Snptest.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala similarity index 98% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala index eb39fa864a481f094636474a7ec61396f5c6dcf8..8c0fbc9603db0bf5a5f2c04047f850a9fb730b0a 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala @@ -65,6 +65,9 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi /** Formats that tabix can handle */ private val validFormats: Set[String] = Set("gff", "bed", "sam", "vcf", "psltbl") + @Output + var outputFiles: List[File] = Nil + override def beforeGraph(): Unit = { super.beforeGraph() p match { diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/TarExtract.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/TarExtract.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/TarExtract.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/TarExtract.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tophat.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tophat.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tophat.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tophat.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala similarity index 97% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala index bc8ae1e7a3072db7afeaa346425c38bb83a8e633..20b4ae8422936bfc5a8b3b8310074b01b46445a6 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala @@ -150,14 +150,17 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu override def defaultCoreMemory = 4.0 + @Output + private var _summary: File = null + override def beforeGraph(): Unit = { super.beforeGraph() if (!cache && !database) { - Logging.addError("Must supply either cache or database for VariantEffectPredictor") + Logging.addError("Must either set 'cache' or 'database' to true for VariantEffectPredictor") } else if (cache && dir.isEmpty) { - Logging.addError("Must supply dir to cache for VariantEffectPredictor") + Logging.addError("Must supply 'dir_cache' to cache for VariantEffectPredictor") } - if (statsText) outputFiles :+= new File(output.getAbsolutePath + "_summary.txt") + if (statsText) _summary = new File(output.getAbsolutePath + "_summary.txt") } /** Returns command to execute */ diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/Bcftools.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/Bcftools.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/Bcftools.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/Bcftools.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsCall.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsCall.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsCall.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsCall.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsMerge.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsMerge.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsMerge.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsMerge.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsView.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsView.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsView.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsView.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/Bedtools.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/Bedtools.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/Bedtools.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/Bedtools.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsCoverage.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsCoverage.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsCoverage.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsCoverage.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsGroupby.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsGroupby.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsGroupby.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsGroupby.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsIntersect.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsIntersect.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsIntersect.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsIntersect.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2Build.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2Build.scala similarity index 92% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2Build.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2Build.scala index 8d2b9ecaea5c8caef2182f52472de1a1a8e936da..ef553453f2a16c4abca32b29af5f963761c8b770 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2Build.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2Build.scala @@ -17,9 +17,9 @@ package nl.lumc.sasc.biopet.extensions.bowtie import java.io.File -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version } import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Argument, Input } +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } /** * Created by pjvan_thof on 8/15/15. @@ -37,6 +37,9 @@ class Bowtie2Build(val root: Configurable) extends BiopetCommandLineFunction wit override def defaultCoreMemory = 15.0 + @Output + private var outputFiles: List[File] = Nil + override def beforeGraph: Unit = { outputFiles ::= new File(reference.getParentFile, baseName + ".1.bt2") outputFiles ::= new File(reference.getParentFile, baseName + ".2.bt2") diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/BowtieBuild.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/BowtieBuild.scala similarity index 92% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/BowtieBuild.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/BowtieBuild.scala index 6e589c589800ccfcf821080eee85f1c917c0eee0..9faeef91b8a9745e315c332a9d0eb7c364dad677 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/BowtieBuild.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/BowtieBuild.scala @@ -17,9 +17,9 @@ package nl.lumc.sasc.biopet.extensions.bowtie import java.io.File -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version } import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Argument, Input } +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } /** * Created by pjvan_thof on 8/15/15. @@ -37,6 +37,9 @@ class BowtieBuild(val root: Configurable) extends BiopetCommandLineFunction with override def defaultCoreMemory = 15.0 + @Output + private var outputFiles: List[File] = Nil + override def beforeGraph: Unit = { outputFiles ::= new File(reference.getParentFile, baseName + ".1.ebwt") outputFiles ::= new File(reference.getParentFile, baseName + ".2.ebwt") diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/BreakdancerCaller.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/BreakdancerCaller.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/BreakdancerCaller.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/BreakdancerCaller.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/BreakdancerConfig.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/BreakdancerConfig.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/BreakdancerConfig.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/BreakdancerConfig.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/BreakdancerVCF.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/BreakdancerVCF.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/BreakdancerVCF.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/BreakdancerVCF.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/Bwa.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/Bwa.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/Bwa.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/Bwa.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaAln.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaAln.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaAln.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaAln.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaIndex.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaIndex.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaIndex.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaIndex.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSamse.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSamse.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSamse.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSamse.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/clever/CleverCaller.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/clever/CleverCaller.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/clever/CleverCaller.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/clever/CleverCaller.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/Conifer.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/Conifer.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/Conifer.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/Conifer.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferAnalyze.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferAnalyze.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferAnalyze.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferAnalyze.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferCall.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferCall.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferCall.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferCall.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferExport.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferExport.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferExport.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferExport.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferRPKM.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferRPKM.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferRPKM.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferRPKM.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/delly/DellyCaller.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/delly/DellyCaller.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/delly/DellyCaller.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/delly/DellyCaller.scala diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala new file mode 100644 index 0000000000000000000000000000000000000000..c0740c64008c0d887a879e445c08e41dc1a96cd6 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala @@ -0,0 +1,67 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } + +class AnalyzeCovariates(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "AnalyzeCovariates" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** file containing the BQSR first-pass report file */ + @Input(fullName = "beforeReportFile", shortName = "before", doc = "file containing the BQSR first-pass report file", required = false, exclusiveOf = "", validation = "") + var beforeReportFile: File = _ + + /** file containing the BQSR second-pass report file */ + @Input(fullName = "afterReportFile", shortName = "after", doc = "file containing the BQSR second-pass report file", required = false, exclusiveOf = "", validation = "") + var afterReportFile: File = _ + + /** do not emit warning messages related to suspicious last modification time order of inputs */ + @Argument(fullName = "ignoreLastModificationTimes", shortName = "ignoreLMT", doc = "do not emit warning messages related to suspicious last modification time order of inputs", required = false, exclusiveOf = "", validation = "") + var ignoreLastModificationTimes: Boolean = config("ignoreLastModificationTimes", default = false) + + /** location of the output report */ + @Output(fullName = "plotsReportFile", shortName = "plots", doc = "location of the output report", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var plotsReportFile: File = _ + + /** location of the csv intermediate file */ + @Output(fullName = "intermediateCsvFile", shortName = "csv", doc = "location of the csv intermediate file", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var intermediateCsvFile: File = _ + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + override def cmdLine = super.cmdLine + + optional("-before", beforeReportFile, spaceSeparated = true, escape = true, format = "%s") + + optional("-after", afterReportFile, spaceSeparated = true, escape = true, format = "%s") + + conditional(ignoreLastModificationTimes, "-ignoreLMT", escape = true, format = "%s") + + optional("-plots", plotsReportFile, spaceSeparated = true, escape = true, format = "%s") + + optional("-csv", intermediateCsvFile, spaceSeparated = true, escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} + +object AnalyzeCovariates { + def apply(root: Configurable, before: File, after: File, plots: File): AnalyzeCovariates = { + val ac = new AnalyzeCovariates(root) + ac.beforeReportFile = before + ac.afterReportFile = after + ac.plotsReportFile = plots + ac + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala new file mode 100644 index 0000000000000000000000000000000000000000..b3be8d8578a2a0563274bfc88fa5cf8eb14df543 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala @@ -0,0 +1,104 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output } + +class ApplyRecalibration(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "ApplyRecalibration" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** The raw input variants to be recalibrated */ + @Input(fullName = "input", shortName = "input", doc = "The raw input variants to be recalibrated", required = true, exclusiveOf = "", validation = "") + var input: Seq[File] = Nil + + /** The input recal file used by ApplyRecalibration */ + @Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal file used by ApplyRecalibration", required = true, exclusiveOf = "", validation = "") + var recal_file: File = _ + + /** The input tranches file describing where to cut the data */ + @Input(fullName = "tranches_file", shortName = "tranchesFile", doc = "The input tranches file describing where to cut the data", required = false, exclusiveOf = "", validation = "") + var tranches_file: File = _ + + /** The output filtered and recalibrated VCF file in which each variant is annotated with its VQSLOD value */ + @Output(fullName = "out", shortName = "o", doc = "The output filtered and recalibrated VCF file in which each variant is annotated with its VQSLOD value", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[CatVariantsGatherer]) + var out: File = _ + + /** The truth sensitivity level at which to start filtering */ + @Argument(fullName = "ts_filter_level", shortName = "ts_filter_level", doc = "The truth sensitivity level at which to start filtering", required = false, exclusiveOf = "", validation = "") + var ts_filter_level: Option[Double] = config("ts_filter_level") + + /** Format string for ts_filter_level */ + @Argument(fullName = "ts_filter_levelFormat", shortName = "", doc = "Format string for ts_filter_level", required = false, exclusiveOf = "", validation = "") + var ts_filter_levelFormat: String = "%s" + + /** The VQSLOD score below which to start filtering */ + @Argument(fullName = "lodCutoff", shortName = "lodCutoff", doc = "The VQSLOD score below which to start filtering", required = false, exclusiveOf = "", validation = "") + var lodCutoff: Option[Double] = config("lodCutoff") + + /** Format string for lodCutoff */ + @Argument(fullName = "lodCutoffFormat", shortName = "", doc = "Format string for lodCutoff", required = false, exclusiveOf = "", validation = "") + var lodCutoffFormat: String = "%s" + + /** If specified, the recalibration will be applied to variants marked as filtered by the specified filter name in the input VCF file */ + @Argument(fullName = "ignore_filter", shortName = "ignoreFilter", doc = "If specified, the recalibration will be applied to variants marked as filtered by the specified filter name in the input VCF file", required = false, exclusiveOf = "", validation = "") + var ignore_filter: List[String] = config("ignore_filter", default = Nil) + + /** If specified, the variant recalibrator will ignore all input filters. Useful to rerun the VQSR from a filtered output file. */ + @Argument(fullName = "ignore_all_filters", shortName = "ignoreAllFilters", doc = "If specified, the variant recalibrator will ignore all input filters. Useful to rerun the VQSR from a filtered output file.", required = false, exclusiveOf = "", validation = "") + var ignore_all_filters: Boolean = config("ignore_all_filters", default = false) + + /** Don't output filtered loci after applying the recalibration */ + @Argument(fullName = "excludeFiltered", shortName = "ef", doc = "Don't output filtered loci after applying the recalibration", required = false, exclusiveOf = "", validation = "") + var excludeFiltered: Boolean = config("excludeFiltered", default = false) + + /** Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously. */ + @Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously.", required = false, exclusiveOf = "", validation = "") + var mode: String = _ + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + @Output + @Gather(enabled = false) + private var outputIndex: File = _ + + override def beforeGraph() { + super.beforeGraph() + deps ++= input.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + if (recal_file != null) + deps :+= VcfUtils.getVcfIndexFile(recal_file) + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + outputIndex = VcfUtils.getVcfIndexFile(out) + num_threads = Option(getThreads) + } + + override def cmdLine = super.cmdLine + + repeat("-input", input, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + required(TaggedFile.formatCommandLineParameter("-recalFile", recal_file), recal_file, spaceSeparated = true, escape = true, format = "%s") + + optional("-tranchesFile", tranches_file, spaceSeparated = true, escape = true, format = "%s") + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + optional("-ts_filter_level", ts_filter_level, spaceSeparated = true, escape = true, format = ts_filter_levelFormat) + + optional("-lodCutoff", lodCutoff, spaceSeparated = true, escape = true, format = lodCutoffFormat) + + repeat("-ignoreFilter", ignore_filter, spaceSeparated = true, escape = true, format = "%s") + conditional(ignore_all_filters, "-ignoreAllFilters", escape = true, format = "%s") + + conditional(excludeFiltered, "-ef", escape = true, format = "%s") + + optional("-mode", mode, spaceSeparated = true, escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BamGatherFunction.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BamGatherFunction.scala new file mode 100644 index 0000000000000000000000000000000000000000..c7a55537e2750e1cb316dfe51d63f9874faa6a72 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BamGatherFunction.scala @@ -0,0 +1,33 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +import nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles +import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction + +/** + * Merges BAM files using htsjdk.samtools.MergeSamFiles. + */ +class BamGatherFunction extends MergeSamFiles(null) with GatherFunction { + + override val root = originalFunction match { + case b: BiopetCommandLineFunction => b + case _ => null + } + + this.assumeSorted = true + + override def freezeFieldValues() { + this.input = this.gatherParts.toList + this.output = this.originalOutput + this.sortOrder = "coordinate" + //Left to its own devices (ie, MergeSamFiles.freezeFieldValues), outputIndex + //will be in the gather directory. Ensure that it actually matches this.output + + val originalGATK = originalFunction.asInstanceOf[CommandLineGATK] + + // Whatever the original function can handle, merging *should* do less. + this.createIndex = !originalGATK.disable_bam_indexing + + super.freezeFieldValues() + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala new file mode 100644 index 0000000000000000000000000000000000000000..7e5bbfd3ff4645d11bd2ee173005c713bf0f2458 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala @@ -0,0 +1,184 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } + +//TODO: check gathering +class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with ScatterGatherableFunction */ { + def analysis_type = "BaseRecalibrator" + //TODO: check gathering + //scatterClass = classOf[ContigScatterFunction] + //setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** A database of known polymorphic sites */ + @Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites", required = false, exclusiveOf = "", validation = "") + var knownSites: List[File] = config("known_sites", default = Nil) + + /** Dependencies on any indexes of knownSites */ + @Input(fullName = "knownSitesIndexes", shortName = "", doc = "Dependencies on any indexes of knownSites", required = false, exclusiveOf = "", validation = "") + private var knownSitesIndexes: Seq[File] = Nil + + /** The output recalibration table file to create */ + @Output(fullName = "out", shortName = "o", doc = "The output recalibration table file to create", required = true, exclusiveOf = "", validation = "") //TODO: check gathering + //@Gather(classOf[org.broadinstitute.gatk.engine.recalibration.BQSRGatherer]) + var out: File = _ + + /** One or more covariates to be used in the recalibration. Can be specified multiple times */ + @Argument(fullName = "covariate", shortName = "cov", doc = "One or more covariates to be used in the recalibration. Can be specified multiple times", required = false, exclusiveOf = "", validation = "") + var covariate: List[String] = config("covariate", default = Nil) + + /** Do not use the standard set of covariates, but rather just the ones listed using the -cov argument */ + @Argument(fullName = "no_standard_covs", shortName = "noStandard", doc = "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument", required = false, exclusiveOf = "", validation = "") + var no_standard_covs: Boolean = config("no_standard_covs", default = false) + + /** If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only. */ + @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.", required = false, exclusiveOf = "", validation = "") + var run_without_dbsnp_potentially_ruining_quality: Boolean = config("run_without_dbsnp_potentially_ruining_quality", default = false) + + /** How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS */ + @Argument(fullName = "solid_recal_mode", shortName = "sMode", doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS", required = false, exclusiveOf = "", validation = "") + var solid_recal_mode: Option[String] = config("solid_recal_mode") + + /** Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ */ + @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false, exclusiveOf = "", validation = "") + var solid_nocall_strategy: Option[String] = config("solid_nocall_strategy") + + /** Size of the k-mer context to be used for base mismatches */ + @Argument(fullName = "mismatches_context_size", shortName = "mcs", doc = "Size of the k-mer context to be used for base mismatches", required = false, exclusiveOf = "", validation = "") + var mismatches_context_size: Option[Int] = config("mismatches_context_size") + + /** Size of the k-mer context to be used for base insertions and deletions */ + @Argument(fullName = "indels_context_size", shortName = "ics", doc = "Size of the k-mer context to be used for base insertions and deletions", required = false, exclusiveOf = "", validation = "") + var indels_context_size: Option[Int] = config("indels_context_size") + + /** The maximum cycle value permitted for the Cycle covariate */ + @Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "The maximum cycle value permitted for the Cycle covariate", required = false, exclusiveOf = "", validation = "") + var maximum_cycle_value: Option[Int] = config("maximum_cycle_value") + + /** default quality for the base mismatches covariate */ + @Argument(fullName = "mismatches_default_quality", shortName = "mdq", doc = "default quality for the base mismatches covariate", required = false, exclusiveOf = "", validation = "") + var mismatches_default_quality: Option[String] = config("mismatches_default_quality") + + /** default quality for the base insertions covariate */ + @Argument(fullName = "insertions_default_quality", shortName = "idq", doc = "default quality for the base insertions covariate", required = false, exclusiveOf = "", validation = "") + var insertions_default_quality: Option[String] = config("insertions_default_quality") + + /** default quality for the base deletions covariate */ + @Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false, exclusiveOf = "", validation = "") + var deletions_default_quality: Option[String] = config("deletions_default_quality") + + /** minimum quality for the bases in the tail of the reads to be considered */ + @Argument(fullName = "low_quality_tail", shortName = "lqt", doc = "minimum quality for the bases in the tail of the reads to be considered", required = false, exclusiveOf = "", validation = "") + var low_quality_tail: Option[String] = config("low_quality_tail") + + /** number of distinct quality scores in the quantized output */ + @Argument(fullName = "quantizing_levels", shortName = "ql", doc = "number of distinct quality scores in the quantized output", required = false, exclusiveOf = "", validation = "") + var quantizing_levels: Option[Int] = config("quantizing_levels") + + /** the binary tag covariate name if using it */ + @Argument(fullName = "binary_tag_name", shortName = "bintag", doc = "the binary tag covariate name if using it", required = false, exclusiveOf = "", validation = "") + var binary_tag_name: Option[String] = config("binary_tag_name") + + /** Sort the rows in the tables of reports */ + @Argument(fullName = "sort_by_all_columns", shortName = "sortAllCols", doc = "Sort the rows in the tables of reports", required = false, exclusiveOf = "", validation = "") + var sort_by_all_columns: Boolean = config("sort_by_all_columns", default = false) + + /** If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid. */ + @Argument(fullName = "default_platform", shortName = "dP", doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.", required = false, exclusiveOf = "", validation = "") + var default_platform: Option[String] = config("default_platform") + + /** If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid. */ + @Argument(fullName = "force_platform", shortName = "fP", doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.", required = false, exclusiveOf = "", validation = "") + var force_platform: Option[String] = config("force_platform") + + /** If provided, the read group of EVERY read will be forced to be the provided String. */ + @Argument(fullName = "force_readgroup", shortName = "fRG", doc = "If provided, the read group of EVERY read will be forced to be the provided String.", required = false, exclusiveOf = "", validation = "") + var force_readgroup: Option[String] = config("force_readgroup") + + /** If provided, log all updates to the recalibration tables to the given file. For debugging/testing purposes only */ + @Output(fullName = "recal_table_update_log", shortName = "recal_table_update_log", doc = "If provided, log all updates to the recalibration tables to the given file. For debugging/testing purposes only", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var recal_table_update_log: File = _ + + /** Max size of the k-mer context to be used for repeat covariates */ + @Argument(fullName = "max_str_unit_length", shortName = "maxstr", doc = "Max size of the k-mer context to be used for repeat covariates", required = false, exclusiveOf = "", validation = "") + var max_str_unit_length: Option[Int] = config("max_str_unit_length") + + /** Max number of repetitions to be used for repeat covariates */ + @Argument(fullName = "max_repeat_length", shortName = "maxrep", doc = "Max number of repetitions to be used for repeat covariates", required = false, exclusiveOf = "", validation = "") + var max_repeat_length: Option[Int] = config("max_repeat_length") + + /** Reduce memory usage in multi-threaded code at the expense of threading efficiency */ + @Argument(fullName = "lowMemoryMode", shortName = "lowMemoryMode", doc = "Reduce memory usage in multi-threaded code at the expense of threading efficiency", required = false, exclusiveOf = "", validation = "") + var lowMemoryMode: Boolean = config("lowMemoryMode", default = false) + + /** BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets */ + @Argument(fullName = "bqsrBAQGapOpenPenalty", shortName = "bqsrBAQGOP", doc = "BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false, exclusiveOf = "", validation = "") + var bqsrBAQGapOpenPenalty: Option[Double] = config("bqsrBAQGapOpenPenalty") + + /** Format string for bqsrBAQGapOpenPenalty */ + @Argument(fullName = "bqsrBAQGapOpenPenaltyFormat", shortName = "", doc = "Format string for bqsrBAQGapOpenPenalty", required = false, exclusiveOf = "", validation = "") + var bqsrBAQGapOpenPenaltyFormat: String = "%s" + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").asString) + + override def beforeGraph() { + super.beforeGraph() + knownSitesIndexes ++= knownSites.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + } + + override def cmdLine = super.cmdLine + + repeat("-knownSites", knownSites, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + required("-o", out, spaceSeparated = true, escape = true, format = "%s") + + repeat("-cov", covariate, spaceSeparated = true, escape = true, format = "%s") + + conditional(no_standard_covs, "-noStandard", escape = true, format = "%s") + + conditional(run_without_dbsnp_potentially_ruining_quality, "-run_without_dbsnp_potentially_ruining_quality", escape = true, format = "%s") + + optional("-sMode", solid_recal_mode, spaceSeparated = true, escape = true, format = "%s") + + optional("-solid_nocall_strategy", solid_nocall_strategy, spaceSeparated = true, escape = true, format = "%s") + + optional("-mcs", mismatches_context_size, spaceSeparated = true, escape = true, format = "%s") + + optional("-ics", indels_context_size, spaceSeparated = true, escape = true, format = "%s") + + optional("-maxCycle", maximum_cycle_value, spaceSeparated = true, escape = true, format = "%s") + + optional("-mdq", mismatches_default_quality, spaceSeparated = true, escape = true, format = "%s") + + optional("-idq", insertions_default_quality, spaceSeparated = true, escape = true, format = "%s") + + optional("-ddq", deletions_default_quality, spaceSeparated = true, escape = true, format = "%s") + + optional("-lqt", low_quality_tail, spaceSeparated = true, escape = true, format = "%s") + + optional("-ql", quantizing_levels, spaceSeparated = true, escape = true, format = "%s") + + optional("-bintag", binary_tag_name, spaceSeparated = true, escape = true, format = "%s") + + conditional(sort_by_all_columns, "-sortAllCols", escape = true, format = "%s") + + optional("-dP", default_platform, spaceSeparated = true, escape = true, format = "%s") + + optional("-fP", force_platform, spaceSeparated = true, escape = true, format = "%s") + + optional("-fRG", force_readgroup, spaceSeparated = true, escape = true, format = "%s") + + optional("-recal_table_update_log", recal_table_update_log, spaceSeparated = true, escape = true, format = "%s") + + optional("-maxstr", max_str_unit_length, spaceSeparated = true, escape = true, format = "%s") + + optional("-maxrep", max_repeat_length, spaceSeparated = true, escape = true, format = "%s") + + conditional(lowMemoryMode, "-lowMemoryMode", escape = true, format = "%s") + + optional("-bqsrBAQGOP", bqsrBAQGapOpenPenalty, spaceSeparated = true, escape = true, format = bqsrBAQGapOpenPenaltyFormat) + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} + +object BaseRecalibrator { + def apply(root: Configurable, input: File, output: File): BaseRecalibrator = { + val br = new BaseRecalibrator(root) + br.input_file :+= input + br.out = output + br + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala new file mode 100644 index 0000000000000000000000000000000000000000..4d712a8407abb8f09b8e6e7fdcceaba8d11d2bb4 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala @@ -0,0 +1,56 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output } + +class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction { + analysisName = "CatVariants" + javaMainClass = "org.broadinstitute.gatk.tools.CatVariants" + + /** genome reference file <name>.fasta */ + @Input(fullName = "reference", shortName = "R", doc = "genome reference file <name>.fasta", required = true, exclusiveOf = "", validation = "") + var reference: File = _ + + /** Input VCF file/s */ + @Input(fullName = "variant", shortName = "V", doc = "Input VCF file/s", required = true, exclusiveOf = "", validation = "") + var variant: Seq[File] = Nil + + /** output file */ + @Output(fullName = "outputFile", shortName = "out", doc = "output file", required = true, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var outputFile: File = _ + + /** assumeSorted should be true if the input files are already sorted (based on the position of the variants) */ + @Argument(fullName = "assumeSorted", shortName = "assumeSorted", doc = "assumeSorted should be true if the input files are already sorted (based on the position of the variants)", required = false, exclusiveOf = "", validation = "") + var assumeSorted: Boolean = _ + + /** which type of IndexCreator to use for VCF/BCF indices */ + @Argument(fullName = "variant_index_type", shortName = "", doc = "which type of IndexCreator to use for VCF/BCF indices", required = false, exclusiveOf = "", validation = "") + var variant_index_type: Option[String] = None + + /** the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator */ + @Argument(fullName = "variant_index_parameter", shortName = "", doc = "the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator", required = false, exclusiveOf = "", validation = "") + var variant_index_parameter: Option[Int] = None + + /** Set the minimum level of logging */ + @Argument(fullName = "logging_level", shortName = "l", doc = "Set the minimum level of logging", required = false, exclusiveOf = "", validation = "") + var logging_level: String = _ + + /** Set the logging location */ + @Output(fullName = "log_to_file", shortName = "log", doc = "Set the logging location", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var log_to_file: File = _ + + override def cmdLine = super.cmdLine + + required("-R", reference, spaceSeparated = true, escape = true, format = "%s") + + repeat("-V", variant, spaceSeparated = true, escape = true, format = "%s") + + required("-out", outputFile, spaceSeparated = true, escape = true, format = "%s") + + conditional(assumeSorted, "-assumeSorted", escape = true, format = "%s") + + optional("--variant_index_type", variant_index_type, spaceSeparated = true, escape = true, format = "%s") + + optional("--variant_index_parameter", variant_index_parameter, spaceSeparated = true, escape = true, format = "%s") + + optional("-l", logging_level, spaceSeparated = true, escape = true, format = "%s") + + optional("-log", log_to_file, spaceSeparated = true, escape = true, format = "%s") +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariantsGather.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariantsGather.scala new file mode 100644 index 0000000000000000000000000000000000000000..27c6cb7cadb59997c7dd0c2039e1f53c74f4f8e7 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariantsGather.scala @@ -0,0 +1,38 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction + +/** + * + * Currently this is the default gather for VCFs. + * One can set a specific gatherer to use by adding @Gather before any output argument. + * For example (used to be part of UG): + * \@Gather(className = "org.broadinstitute.gatk.queue.extensions.gatk.CatVariantsGatherer") + * \@Output(doc="File to which variants should be written",required=true) + * protected VariantContextWriter writer = null; + */ +class CatVariantsGatherer extends CatVariants(null) with GatherFunction { + this.assumeSorted = true + + analysisName = "Gather_CatVariants" + + override val root = originalFunction match { + case b: BiopetCommandLineFunction => b + case _ => null + } + + override def freezeFieldValues() { + val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK] + + this.reference = originalGATK.reference_sequence + this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input" + index) } + this.outputFile = this.originalOutput + this.assumeSorted = true + this.variant_index_type = originalGATK.variant_index_type + this.variant_index_parameter = originalGATK.variant_index_parameter + + super.freezeFieldValues() + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala new file mode 100644 index 0000000000000000000000000000000000000000..ed0065e589e17000a4e4fb9742d4bca71f6622a1 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala @@ -0,0 +1,89 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output, _ } + +class CombineGVCFs(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "CombineGVCFs" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** One or more specific annotations to recompute. The single value 'none' removes the default annotations */ + @Argument(fullName = "annotation", shortName = "A", doc = "One or more specific annotations to recompute. The single value 'none' removes the default annotations", required = false, exclusiveOf = "", validation = "") + var annotation: List[String] = config("annotation", default = Nil, freeVar = false) + + /** One or more classes/groups of annotations to apply to variant calls */ + @Argument(fullName = "group", shortName = "G", doc = "One or more classes/groups of annotations to apply to variant calls", required = false, exclusiveOf = "", validation = "") + var group: List[String] = config("group", default = Nil) + + /** dbSNP file */ + @Input(fullName = "dbsnp", shortName = "D", doc = "dbSNP file", required = false, exclusiveOf = "", validation = "") + var dbsnp: Option[File] = config("dbsnp") + + /** One or more input gVCF files */ + @Input(fullName = "variant", shortName = "V", doc = "One or more input gVCF files", required = true, exclusiveOf = "", validation = "") + var variant: Seq[File] = Nil + + /** File to which the combined gVCF should be written */ + @Output(fullName = "out", shortName = "o", doc = "File to which the combined gVCF should be written", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[CatVariantsGatherer]) + var out: File = _ + + /** If specified, convert banded gVCFs to all-sites gVCFs */ + @Argument(fullName = "convertToBasePairResolution", shortName = "bpResolution", doc = "If specified, convert banded gVCFs to all-sites gVCFs", required = false, exclusiveOf = "", validation = "") + var convertToBasePairResolution: Boolean = config("convertToBasePairResolution", default = false) + + /** If > 0, reference bands will be broken up at genomic positions that are multiples of this number */ + @Argument(fullName = "breakBandsAtMultiplesOf", shortName = "breakBandsAtMultiplesOf", doc = "If > 0, reference bands will be broken up at genomic positions that are multiples of this number", required = false, exclusiveOf = "", validation = "") + var breakBandsAtMultiplesOf: Option[Int] = config("breakBandsAtMultiplesOf") + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + @Output + @Gather(enabled = false) + private var outputIndex: File = _ + + override def beforeGraph() { + super.beforeGraph() + dbsnp.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + deps ++= variant.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + outputIndex = VcfUtils.getVcfIndexFile(out) + } + + override def cmdLine = super.cmdLine + + repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") + + repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + repeat("-V", variant, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + conditional(convertToBasePairResolution, "-bpResolution", escape = true, format = "%s") + + optional("-breakBandsAtMultiplesOf", breakBandsAtMultiplesOf, spaceSeparated = true, escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} + +object CombineGVCFs { + def apply(root: Configurable, input: List[File], output: File): CombineGVCFs = { + val cg = new CombineGVCFs(root) + cg.variant = input + cg.out = output + cg + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala new file mode 100644 index 0000000000000000000000000000000000000000..ba1740d3518e1ea6dbc17a9c60b0d6aa59abc9df --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala @@ -0,0 +1,128 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } + +class CombineVariants(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "CombineVariants" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** VCF files to merge together */ + @Input(fullName = "variant", shortName = "V", doc = "VCF files to merge together", required = true, exclusiveOf = "", validation = "") + var variant: Seq[File] = Nil + + /** File to which variants should be written */ + @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[CatVariantsGatherer]) + var out: File = _ + + /** Determines how we should merge genotype records for samples shared across the ROD files */ + @Argument(fullName = "genotypemergeoption", shortName = "genotypeMergeOptions", doc = "Determines how we should merge genotype records for samples shared across the ROD files", required = false, exclusiveOf = "", validation = "") + var genotypemergeoption: Option[String] = config("genotypemergeoption") + + /** Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields */ + @Argument(fullName = "filteredrecordsmergetype", shortName = "filteredRecordsMergeType", doc = "Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required = false, exclusiveOf = "", validation = "") + var filteredrecordsmergetype: Option[String] = config("filteredrecordsmergetype") + + /** Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel) */ + @Argument(fullName = "multipleallelesmergetype", shortName = "multipleAllelesMergeType", doc = "Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required = false, exclusiveOf = "", validation = "") + var multipleallelesmergetype: Option[String] = config("multipleallelesmergetype") + + /** Ordered list specifying priority for merging */ + @Argument(fullName = "rod_priority_list", shortName = "priority", doc = "Ordered list specifying priority for merging", required = false, exclusiveOf = "", validation = "") + var rod_priority_list: Option[String] = config("rod_priority_list") + + /** Emit interesting sites requiring complex compatibility merging to file */ + @Argument(fullName = "printComplexMerges", shortName = "printComplexMerges", doc = "Emit interesting sites requiring complex compatibility merging to file", required = false, exclusiveOf = "", validation = "") + var printComplexMerges: Boolean = config("printComplexMerges", default = false) + + /** Treat filtered variants as uncalled */ + @Argument(fullName = "filteredAreUncalled", shortName = "filteredAreUncalled", doc = "Treat filtered variants as uncalled", required = false, exclusiveOf = "", validation = "") + var filteredAreUncalled: Boolean = config("filteredAreUncalled", default = false) + + /** Emit a sites-only file */ + @Argument(fullName = "minimalVCF", shortName = "minimalVCF", doc = "Emit a sites-only file", required = false, exclusiveOf = "", validation = "") + var minimalVCF: Boolean = config("minimalVCF", default = false) + + /** Exclude sites where no variation is present after merging */ + @Argument(fullName = "excludeNonVariants", shortName = "env", doc = "Exclude sites where no variation is present after merging", required = false, exclusiveOf = "", validation = "") + var excludeNonVariants: Boolean = config("excludeNonVariants", default = false) + + /** Key name for the set attribute */ + @Argument(fullName = "setKey", shortName = "setKey", doc = "Key name for the set attribute", required = false, exclusiveOf = "", validation = "") + var setKey: Option[String] = config("set_key") + + /** Assume input VCFs have identical sample sets and disjoint calls */ + @Argument(fullName = "assumeIdenticalSamples", shortName = "assumeIdenticalSamples", doc = "Assume input VCFs have identical sample sets and disjoint calls", required = false, exclusiveOf = "", validation = "") + var assumeIdenticalSamples: Boolean = config("assumeIdenticalSamples", default = false) + + /** Minimum number of input files the site must be observed in to be included */ + @Argument(fullName = "minimumN", shortName = "minN", doc = "Minimum number of input files the site must be observed in to be included", required = false, exclusiveOf = "", validation = "") + var minimumN: Option[Int] = config("minimumN") + + /** Do not output the command line to the header */ + @Argument(fullName = "suppressCommandLineHeader", shortName = "suppressCommandLineHeader", doc = "Do not output the command line to the header", required = false, exclusiveOf = "", validation = "") + var suppressCommandLineHeader: Boolean = config("suppressCommandLineHeader", default = false) + + /** Use the INFO content of the record with the highest AC */ + @Argument(fullName = "mergeInfoWithMaxAC", shortName = "mergeInfoWithMaxAC", doc = "Use the INFO content of the record with the highest AC", required = false, exclusiveOf = "", validation = "") + var mergeInfoWithMaxAC: Boolean = config("mergeInfoWithMaxAC", default = false) + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + @Output + @Gather(enabled = false) + private var outputIndex: File = _ + + override def beforeGraph() { + super.beforeGraph() + deps ++= variant.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + outputIndex = VcfUtils.getVcfIndexFile(out) + } + + override def cmdLine = super.cmdLine + + repeat("-V", variant, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + optional("-genotypeMergeOptions", genotypemergeoption, spaceSeparated = true, escape = true, format = "%s") + + optional("-filteredRecordsMergeType", filteredrecordsmergetype, spaceSeparated = true, escape = true, format = "%s") + + optional("-multipleAllelesMergeType", multipleallelesmergetype, spaceSeparated = true, escape = true, format = "%s") + + optional("-priority", rod_priority_list, spaceSeparated = true, escape = true, format = "%s") + + conditional(printComplexMerges, "-printComplexMerges", escape = true, format = "%s") + + conditional(filteredAreUncalled, "-filteredAreUncalled", escape = true, format = "%s") + + conditional(minimalVCF, "-minimalVCF", escape = true, format = "%s") + + conditional(excludeNonVariants, "-env", escape = true, format = "%s") + + optional("-setKey", setKey, spaceSeparated = true, escape = true, format = "%s") + + conditional(assumeIdenticalSamples, "-assumeIdenticalSamples", escape = true, format = "%s") + + optional("-minN", minimumN, spaceSeparated = true, escape = true, format = "%s") + + conditional(suppressCommandLineHeader, "-suppressCommandLineHeader", escape = true, format = "%s") + + conditional(mergeInfoWithMaxAC, "-mergeInfoWithMaxAC", escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} + +object CombineVariants { + def apply(root: Configurable, input: List[File], output: File): CombineVariants = { + val cv = new CombineVariants(root) + cv.variant = input + cv.out = output + cv + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CommandLineGATK.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CommandLineGATK.scala new file mode 100644 index 0000000000000000000000000000000000000000..7fbba210ce42d99573c3dfd127a45ab9346f438a --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CommandLineGATK.scala @@ -0,0 +1,406 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, Reference, Version } +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output } +import org.broadinstitute.gatk.utils.interval.{ IntervalMergingRule, IntervalSetRule } + +trait CommandLineGATK extends BiopetJavaCommandLineFunction with Reference with Version { + analysisName = analysis_type + javaMainClass = "org.broadinstitute.gatk.engine.CommandLineGATK" + jarFile = config("gatk_jar") + + /** Name of the tool to run */ + def analysis_type: String + + /** Input file containing sequence data (BAM or CRAM) */ + @Input(fullName = "input_file", shortName = "I", doc = "Input file containing sequence data (BAM or CRAM)", required = false, exclusiveOf = "", validation = "") + var input_file: Seq[File] = Nil + + /** Dependencies on any indexes of input_file */ + @Input(fullName = "input_fileIndexes", shortName = "", doc = "Dependencies on any indexes of input_file", required = false, exclusiveOf = "", validation = "") + private var input_fileIndexes: Seq[File] = Nil + + /** Emit a log entry (level INFO) containing the full list of sequence data files to be included in the analysis (including files inside .bam.list or .cram.list files). */ + @Argument(fullName = "showFullBamList", shortName = "", doc = "Emit a log entry (level INFO) containing the full list of sequence data files to be included in the analysis (including files inside .bam.list or .cram.list files).", required = false, exclusiveOf = "", validation = "") + var showFullBamList: Boolean = config("showFullBamList", default = false) + + /** Number of reads per SAM file to buffer in memory */ + @Argument(fullName = "read_buffer_size", shortName = "rbs", doc = "Number of reads per SAM file to buffer in memory", required = false, exclusiveOf = "", validation = "") + var read_buffer_size: Option[Int] = config("read_buffer_size") + + /** Run reporting mode */ + @Argument(fullName = "phone_home", shortName = "et", doc = "Run reporting mode", required = false, exclusiveOf = "", validation = "") + var phone_home: Option[String] = config("phone_home") + + /** GATK key file required to run with -et NO_ET */ + @Input(fullName = "gatk_key", shortName = "K", doc = "GATK key file required to run with -et NO_ET", required = false, exclusiveOf = "", validation = "") + var gatk_key: Option[File] = config("gatk_key") + + /** Tag to identify this GATK run as part of a group of runs */ + @Argument(fullName = "tag", shortName = "tag", doc = "Tag to identify this GATK run as part of a group of runs", required = false, exclusiveOf = "", validation = "") + var tag: Option[String] = config("tag") + + /** Filters to apply to reads before analysis */ + @Argument(fullName = "read_filter", shortName = "rf", doc = "Filters to apply to reads before analysis", required = false, exclusiveOf = "", validation = "") + var read_filter: List[String] = config("read_filter", default = Nil) + + /** Read filters to disable */ + @Argument(fullName = "disable_read_filter", shortName = "drf", doc = "Read filters to disable", required = false, exclusiveOf = "", validation = "") + var disable_read_filter: List[String] = config("disable_read_filter", default = Nil) + + /** One or more genomic intervals over which to operate */ + @Input(fullName = "intervals", shortName = "L", doc = "One or more genomic intervals over which to operate", required = false, exclusiveOf = "intervalsString", validation = "") + var intervals: List[File] = config("intervals", default = Nil) + + /** One or more genomic intervals over which to operate */ + @Argument(fullName = "intervalsString", shortName = "L", doc = "One or more genomic intervals over which to operate", required = false, exclusiveOf = "intervals", validation = "") + var intervalsString: List[String] = config("intervalsString", default = Nil) + + /** One or more genomic intervals to exclude from processing */ + @Input(fullName = "excludeIntervals", shortName = "XL", doc = "One or more genomic intervals to exclude from processing", required = false, exclusiveOf = "excludeIntervalsString", validation = "") + var excludeIntervals: List[File] = config("excludeIntervals", default = Nil) + + /** One or more genomic intervals to exclude from processing */ + @Argument(fullName = "excludeIntervalsString", shortName = "XL", doc = "One or more genomic intervals to exclude from processing", required = false, exclusiveOf = "excludeIntervals", validation = "") + var excludeIntervalsString: List[String] = config("excludeIntervalsString", default = Nil) + + /** Set merging approach to use for combining interval inputs */ + @Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Set merging approach to use for combining interval inputs", required = false, exclusiveOf = "", validation = "") + var interval_set_rule: Option[IntervalSetRule] = None + + /** Interval merging rule for abutting intervals */ + @Argument(fullName = "interval_merging", shortName = "im", doc = "Interval merging rule for abutting intervals", required = false, exclusiveOf = "", validation = "") + var interval_merging: Option[IntervalMergingRule] = None + + /** Amount of padding (in bp) to add to each interval */ + @Argument(fullName = "interval_padding", shortName = "ip", doc = "Amount of padding (in bp) to add to each interval", required = false, exclusiveOf = "", validation = "") + var interval_padding: Option[Int] = config("interval_padding") + + /** Reference sequence file */ + @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false, exclusiveOf = "", validation = "") + var reference_sequence: File = _ + + /** Use a non-deterministic random seed */ + @Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Use a non-deterministic random seed", required = false, exclusiveOf = "", validation = "") + var nonDeterministicRandomSeed: Boolean = config("nonDeterministicRandomSeed", default = false) + + /** Completely eliminates randomized dithering from rank sum tests. */ + @Argument(fullName = "disableDithering", shortName = "", doc = "Completely eliminates randomized dithering from rank sum tests.", required = false, exclusiveOf = "", validation = "") + var disableDithering: Boolean = config("disableDithering", default = false) + + /** Stop execution cleanly as soon as maxRuntime has been reached */ + @Argument(fullName = "maxRuntime", shortName = "maxRuntime", doc = "Stop execution cleanly as soon as maxRuntime has been reached", required = false, exclusiveOf = "", validation = "") + var maxRuntime: Option[Long] = config("maxRuntime") + + /** Unit of time used by maxRuntime */ + @Argument(fullName = "maxRuntimeUnits", shortName = "maxRuntimeUnits", doc = "Unit of time used by maxRuntime", required = false, exclusiveOf = "", validation = "") + var maxRuntimeUnits: Option[String] = config("maxRuntimeUnits") + + /** Type of read downsampling to employ at a given locus */ + @Argument(fullName = "downsampling_type", shortName = "dt", doc = "Type of read downsampling to employ at a given locus", required = false, exclusiveOf = "", validation = "") + var downsampling_type: Option[String] = config("downsampling_type") + + /** Fraction of reads to downsample to */ + @Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction of reads to downsample to", required = false, exclusiveOf = "", validation = "") + var downsample_to_fraction: Option[Double] = config("downsample_to_fraction") + + /** Format string for downsample_to_fraction */ + @Argument(fullName = "downsample_to_fractionFormat", shortName = "", doc = "Format string for downsample_to_fraction", required = false, exclusiveOf = "", validation = "") + var downsample_to_fractionFormat: String = "%s" + + /** Target coverage threshold for downsampling to coverage */ + @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Target coverage threshold for downsampling to coverage", required = false, exclusiveOf = "", validation = "") + var downsample_to_coverage: Option[Int] = config("downsample_to_coverage") + + /** Type of BAQ calculation to apply in the engine */ + @Argument(fullName = "baq", shortName = "baq", doc = "Type of BAQ calculation to apply in the engine", required = false, exclusiveOf = "", validation = "") + var baq: Option[String] = config("baq") + + /** BAQ gap open penalty */ + @Argument(fullName = "baqGapOpenPenalty", shortName = "baqGOP", doc = "BAQ gap open penalty", required = false, exclusiveOf = "", validation = "") + var baqGapOpenPenalty: Option[Double] = config("baqGapOpenPenalty") + + /** Format string for baqGapOpenPenalty */ + @Argument(fullName = "baqGapOpenPenaltyFormat", shortName = "", doc = "Format string for baqGapOpenPenalty", required = false, exclusiveOf = "", validation = "") + var baqGapOpenPenaltyFormat: String = "%s" + + /** Reduce NDN elements in CIGAR string */ + @Argument(fullName = "refactor_NDN_cigar_string", shortName = "fixNDN", doc = "Reduce NDN elements in CIGAR string", required = false, exclusiveOf = "", validation = "") + var refactor_NDN_cigar_string: Boolean = config("refactor_NDN_cigar_string", default = false) + + /** Fix mis-encoded base quality scores */ + @Argument(fullName = "fix_misencoded_quality_scores", shortName = "fixMisencodedQuals", doc = "Fix mis-encoded base quality scores", required = false, exclusiveOf = "", validation = "") + var fix_misencoded_quality_scores: Boolean = config("fix_misencoded_quality_scores", default = false) + + /** Ignore warnings about base quality score encoding */ + @Argument(fullName = "allow_potentially_misencoded_quality_scores", shortName = "allowPotentiallyMisencodedQuals", doc = "Ignore warnings about base quality score encoding", required = false, exclusiveOf = "", validation = "") + var allow_potentially_misencoded_quality_scores: Boolean = config("allow_potentially_misencoded_quality_scores", default = false) + + /** Use the base quality scores from the OQ tag */ + @Argument(fullName = "useOriginalQualities", shortName = "OQ", doc = "Use the base quality scores from the OQ tag", required = false, exclusiveOf = "", validation = "") + var useOriginalQualities: Boolean = config("useOriginalQualities", default = false) + + /** Assign a default base quality */ + @Argument(fullName = "defaultBaseQualities", shortName = "DBQ", doc = "Assign a default base quality", required = false, exclusiveOf = "", validation = "") + var defaultBaseQualities: Option[Int] = config("defaultBaseQualities") + + /** Write GATK runtime performance log to this file */ + @Output(fullName = "performanceLog", shortName = "PF", doc = "Write GATK runtime performance log to this file", required = false, exclusiveOf = "", validation = "") + var performanceLog: Option[File] = None + + /** Input covariates table file for on-the-fly base quality score recalibration */ + @Input(fullName = "BQSR", shortName = "BQSR", doc = "Input covariates table file for on-the-fly base quality score recalibration", required = false, exclusiveOf = "", validation = "") + var BQSR: Option[File] = _ + + /** Quantize quality scores to a given number of levels (with -BQSR) */ + @Argument(fullName = "quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels (with -BQSR)", required = false, exclusiveOf = "", validation = "") + var quantize_quals: Option[Int] = config("quantize_quals") + + /** Use static quantized quality scores to a given number of levels (with -BQSR) */ + @Argument(fullName = "static_quantized_quals", shortName = "SQQ", doc = "Use static quantized quality scores to a given number of levels (with -BQSR)", required = false, exclusiveOf = "quantize_quals", validation = "") + var static_quantized_quals: List[Int] = config("static_quantized_quals", default = Nil) + + /** Round quals down to nearest quantized qual */ + @Argument(fullName = "round_down_quantized", shortName = "RDQ", doc = "Round quals down to nearest quantized qual", required = false, exclusiveOf = "quantize_quals", validation = "") + var round_down_quantized: Boolean = config("round_down_quantized", default = false) + + /** Disable printing of base insertion and deletion tags (with -BQSR) */ + @Argument(fullName = "disable_indel_quals", shortName = "DIQ", doc = "Disable printing of base insertion and deletion tags (with -BQSR)", required = false, exclusiveOf = "", validation = "") + var disable_indel_quals: Boolean = config("disable_indel_quals", default = false) + + /** Emit the OQ tag with the original base qualities (with -BQSR) */ + @Argument(fullName = "emit_original_quals", shortName = "EOQ", doc = "Emit the OQ tag with the original base qualities (with -BQSR)", required = false, exclusiveOf = "", validation = "") + var emit_original_quals: Boolean = config("emit_original_quals", default = false) + + /** Don't recalibrate bases with quality scores less than this threshold (with -BQSR) */ + @Argument(fullName = "preserve_qscores_less_than", shortName = "preserveQ", doc = "Don't recalibrate bases with quality scores less than this threshold (with -BQSR)", required = false, exclusiveOf = "", validation = "") + var preserve_qscores_less_than: Option[Int] = config("preserve_qscores_less_than") + + /** Global Qscore Bayesian prior to use for BQSR */ + @Argument(fullName = "globalQScorePrior", shortName = "globalQScorePrior", doc = "Global Qscore Bayesian prior to use for BQSR", required = false, exclusiveOf = "", validation = "") + var globalQScorePrior: Option[Double] = config("globalQScorePrior") + + /** Format string for globalQScorePrior */ + @Argument(fullName = "globalQScorePriorFormat", shortName = "", doc = "Format string for globalQScorePrior", required = false, exclusiveOf = "", validation = "") + var globalQScorePriorFormat: String = "%s" + + /** How strict should we be with validation */ + @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false, exclusiveOf = "", validation = "") + var validation_strictness: Option[String] = config("validation_strictness") + + /** Remove program records from the SAM header */ + @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Remove program records from the SAM header", required = false, exclusiveOf = "", validation = "") + var remove_program_records: Boolean = config("remove_program_records", default = false) + + /** Keep program records in the SAM header */ + @Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Keep program records in the SAM header", required = false, exclusiveOf = "", validation = "") + var keep_program_records: Boolean = config("keep_program_records", default = false) + + /** Rename sample IDs on-the-fly at runtime using the provided mapping file */ + @Input(fullName = "sample_rename_mapping_file", shortName = "sample_rename_mapping_file", doc = "Rename sample IDs on-the-fly at runtime using the provided mapping file", required = false, exclusiveOf = "", validation = "") + var sample_rename_mapping_file: Option[File] = config("sample_rename_mapping_file") + + /** Enable unsafe operations: nothing will be checked at runtime */ + @Argument(fullName = "unsafe", shortName = "U", doc = "Enable unsafe operations: nothing will be checked at runtime", required = false, exclusiveOf = "", validation = "") + var unsafe: Option[String] = config("unsafe") + + /** Disable both auto-generation of index files and index file locking */ + @Argument(fullName = "disable_auto_index_creation_and_locking_when_reading_rods", shortName = "disable_auto_index_creation_and_locking_when_reading_rods", doc = "Disable both auto-generation of index files and index file locking", required = false, exclusiveOf = "", validation = "") + var disable_auto_index_creation_and_locking_when_reading_rods: Boolean = config("disable_auto_index_creation_and_locking_when_reading_rods", default = false) + + /** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */ + @Argument(fullName = "no_cmdline_in_header", shortName = "no_cmdline_in_header", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required = false, exclusiveOf = "", validation = "") + var no_cmdline_in_header: Boolean = config("no_cmdline_in_header", default = false) + + /** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */ + @Argument(fullName = "sites_only", shortName = "sites_only", doc = "Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required = false, exclusiveOf = "", validation = "") + var sites_only: Boolean = config("sites_only", default = false) + + /** Always output all the records in VCF FORMAT fields, even if some are missing */ + @Argument(fullName = "never_trim_vcf_format_field", shortName = "writeFullFormat", doc = "Always output all the records in VCF FORMAT fields, even if some are missing", required = false, exclusiveOf = "", validation = "") + var never_trim_vcf_format_field: Boolean = config("never_trim_vcf_format_field", default = false) + + /** Force BCF output, regardless of the file's extension */ + @Argument(fullName = "bcf", shortName = "bcf", doc = "Force BCF output, regardless of the file's extension", required = false, exclusiveOf = "", validation = "") + var bcf: Boolean = config("bcf", default = false) + + /** Compression level to use for writing BAM files (0 - 9, higher is more compressed) */ + @Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files (0 - 9, higher is more compressed)", required = false, exclusiveOf = "", validation = "") + var bam_compression: Option[Int] = config("bam_compression") + + /** If provided, output BAM/CRAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier */ + @Argument(fullName = "simplifyBAM", shortName = "simplifyBAM", doc = "If provided, output BAM/CRAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier", required = false, exclusiveOf = "", validation = "") + var simplifyBAM: Boolean = config("simplifyBAM", default = false) + + /** Turn off on-the-fly creation of indices for output BAM/CRAM files. */ + @Argument(fullName = "disable_bam_indexing", shortName = "", doc = "Turn off on-the-fly creation of indices for output BAM/CRAM files.", required = false, exclusiveOf = "", validation = "") + var disable_bam_indexing: Boolean = config("disable_bam_indexing", default = false) + + /** Enable on-the-fly creation of md5s for output BAM files. */ + @Argument(fullName = "generate_md5", shortName = "", doc = "Enable on-the-fly creation of md5s for output BAM files.", required = false, exclusiveOf = "", validation = "") + var generate_md5: Boolean = config("generate_md5", default = false) + + /** Number of data threads to allocate to this analysis */ + @Argument(fullName = "num_threads", shortName = "nt", doc = "Number of data threads to allocate to this analysis", required = false, exclusiveOf = "", validation = "") + var num_threads: Option[Int] = None + + /** Number of CPU threads to allocate per data thread */ + @Argument(fullName = "num_cpu_threads_per_data_thread", shortName = "nct", doc = "Number of CPU threads to allocate per data thread", required = false, exclusiveOf = "", validation = "") + var num_cpu_threads_per_data_thread: Option[Int] = None + + /** Number of given threads to allocate to BAM IO */ + @Argument(fullName = "num_io_threads", shortName = "nit", doc = "Number of given threads to allocate to BAM IO", required = false, exclusiveOf = "", validation = "") + var num_io_threads: Option[Int] = None + + /** Enable threading efficiency monitoring */ + @Argument(fullName = "monitorThreadEfficiency", shortName = "mte", doc = "Enable threading efficiency monitoring", required = false, exclusiveOf = "", validation = "") + var monitorThreadEfficiency: Boolean = config("monitorThreadEfficiency", default = false) + + /** When using IO threads, total number of BAM file handles to keep open simultaneously */ + @Argument(fullName = "num_bam_file_handles", shortName = "bfh", doc = "When using IO threads, total number of BAM file handles to keep open simultaneously", required = false, exclusiveOf = "", validation = "") + var num_bam_file_handles: Option[Int] = None + + /** Exclude read groups based on tags */ + @Input(fullName = "read_group_black_list", shortName = "rgbl", doc = "Exclude read groups based on tags", required = false, exclusiveOf = "", validation = "") + var read_group_black_list: List[File] = config("read_group_black_list", default = Nil) + + /** Pedigree files for samples */ + @Argument(fullName = "pedigree", shortName = "ped", doc = "Pedigree files for samples", required = false, exclusiveOf = "", validation = "") + var pedigree: List[File] = config("pedigree", default = Nil) + + /** Pedigree string for samples */ + @Argument(fullName = "pedigreeString", shortName = "pedString", doc = "Pedigree string for samples", required = false, exclusiveOf = "", validation = "") + var pedigreeString: List[String] = config("pedigreeString", default = Nil) + + /** Validation strictness for pedigree information */ + @Argument(fullName = "pedigreeValidationType", shortName = "pedValidationType", doc = "Validation strictness for pedigree information", required = false, exclusiveOf = "", validation = "") + var pedigreeValidationType: Option[String] = config("pedigreeValidationType") + + /** Allow interval processing with an unsupported BAM/CRAM */ + @Argument(fullName = "allow_intervals_with_unindexed_bam", shortName = "", doc = "Allow interval processing with an unsupported BAM/CRAM", required = false, exclusiveOf = "", validation = "") + var allow_intervals_with_unindexed_bam: Boolean = config("allow_intervals_with_unindexed_bam", default = false) + + /** Write a BCF copy of the output VCF */ + @Argument(fullName = "generateShadowBCF", shortName = "generateShadowBCF", doc = "Write a BCF copy of the output VCF", required = false, exclusiveOf = "", validation = "") + var generateShadowBCF: Boolean = config("generateShadowBCF", default = false) + + /** Type of IndexCreator to use for VCF/BCF indices */ + @Argument(fullName = "variant_index_type", shortName = "variant_index_type", doc = "Type of IndexCreator to use for VCF/BCF indices", required = false, exclusiveOf = "", validation = "") + var variant_index_type: Option[String] = config("variant_index_type") + + /** Parameter to pass to the VCF/BCF IndexCreator */ + @Argument(fullName = "variant_index_parameter", shortName = "variant_index_parameter", doc = "Parameter to pass to the VCF/BCF IndexCreator", required = false, exclusiveOf = "", validation = "") + var variant_index_parameter: Option[Int] = config("variant_index_parameter") + + /** Reference window stop */ + @Argument(fullName = "reference_window_stop", shortName = "ref_win_stop", doc = "Reference window stop", required = false, exclusiveOf = "", validation = "") + var reference_window_stop: Option[Int] = config("reference_window_stop") + + /** Set the minimum level of logging */ + @Argument(fullName = "logging_level", shortName = "l", doc = "Set the minimum level of logging", required = false, exclusiveOf = "", validation = "") + var logging_level: Option[String] = config("logging_level") + + /** Set the logging location */ + @Output(fullName = "log_to_file", shortName = "log", doc = "Set the logging location", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var log_to_file: File = _ + + def versionRegex = """(.*)""".r + override def versionExitcode = List(0, 1) + def versionCommand = executable + " -jar " + jarFile + " -version" + + override def defaultCoreMemory = 4.0 + override def faiRequired = true + override def dictRequired = true + + override def beforeGraph() { + super.beforeGraph() + if (interval_set_rule.isEmpty) { + val v: Option[String] = config("interval_set_rule") + interval_set_rule = v.map(IntervalSetRule.valueOf(_)) + } + if (interval_merging.isEmpty) { + val v: Option[String] = config("interval_merging") + interval_merging = v.map(IntervalMergingRule.valueOf(_)) + } + if (reference_sequence == null) reference_sequence = referenceFasta() + input_fileIndexes ++= input_file.filter(orig => orig != null && orig.getName.endsWith(".bam")).flatMap(orig => Array(new File(orig.getPath.stripSuffix(".bam") + ".bai"))) + if (num_threads.isDefined) nCoresRequest = num_threads + if (num_cpu_threads_per_data_thread.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * num_cpu_threads_per_data_thread.getOrElse(1)) + } + + override def cmdLine = super.cmdLine + + required("-T", analysis_type, spaceSeparated = true, escape = true, format = "%s") + + repeat("-I", input_file, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + conditional(showFullBamList, "--showFullBamList", escape = true, format = "%s") + + optional("-rbs", read_buffer_size, spaceSeparated = true, escape = true, format = "%s") + + optional("-et", phone_home, spaceSeparated = true, escape = true, format = "%s") + + optional("-K", gatk_key, spaceSeparated = true, escape = true, format = "%s") + + optional("-tag", tag, spaceSeparated = true, escape = true, format = "%s") + + repeat("-rf", read_filter, spaceSeparated = true, escape = true, format = "%s") + + repeat("-drf", disable_read_filter, spaceSeparated = true, escape = true, format = "%s") + + repeat("-L", intervals, spaceSeparated = true, escape = true, format = "%s") + + repeat("-L", intervalsString, spaceSeparated = true, escape = true, format = "%s") + + repeat("-XL", excludeIntervals, spaceSeparated = true, escape = true, format = "%s") + + repeat("-XL", excludeIntervalsString, spaceSeparated = true, escape = true, format = "%s") + + optional("-isr", interval_set_rule, spaceSeparated = true, escape = true, format = "%s") + + optional("-im", interval_merging, spaceSeparated = true, escape = true, format = "%s") + + optional("-ip", interval_padding, spaceSeparated = true, escape = true, format = "%s") + + optional("-R", reference_sequence, spaceSeparated = true, escape = true, format = "%s") + + conditional(nonDeterministicRandomSeed, "-ndrs", escape = true, format = "%s") + + conditional(disableDithering, "--disableDithering", escape = true, format = "%s") + + optional("-maxRuntime", maxRuntime, spaceSeparated = true, escape = true, format = "%s") + + optional("-maxRuntimeUnits", maxRuntimeUnits, spaceSeparated = true, escape = true, format = "%s") + + optional("-dt", downsampling_type, spaceSeparated = true, escape = true, format = "%s") + + optional("-dfrac", downsample_to_fraction, spaceSeparated = true, escape = true, format = downsample_to_fractionFormat) + + optional("-dcov", downsample_to_coverage, spaceSeparated = true, escape = true, format = "%s") + + optional("-baq", baq, spaceSeparated = true, escape = true, format = "%s") + + optional("-baqGOP", baqGapOpenPenalty, spaceSeparated = true, escape = true, format = baqGapOpenPenaltyFormat) + + conditional(refactor_NDN_cigar_string, "-fixNDN", escape = true, format = "%s") + + conditional(fix_misencoded_quality_scores, "-fixMisencodedQuals", escape = true, format = "%s") + + conditional(allow_potentially_misencoded_quality_scores, "-allowPotentiallyMisencodedQuals", escape = true, format = "%s") + + conditional(useOriginalQualities, "-OQ", escape = true, format = "%s") + + optional("-DBQ", defaultBaseQualities, spaceSeparated = true, escape = true, format = "%s") + + optional("-PF", performanceLog, spaceSeparated = true, escape = true, format = "%s") + + optional("-BQSR", BQSR, spaceSeparated = true, escape = true, format = "%s") + + optional("-qq", quantize_quals, spaceSeparated = true, escape = true, format = "%s") + + repeat("-SQQ", static_quantized_quals, spaceSeparated = true, escape = true, format = "%s") + + conditional(round_down_quantized, "-RDQ", escape = true, format = "%s") + + conditional(disable_indel_quals, "-DIQ", escape = true, format = "%s") + + conditional(emit_original_quals, "-EOQ", escape = true, format = "%s") + + optional("-preserveQ", preserve_qscores_less_than, spaceSeparated = true, escape = true, format = "%s") + + optional("-globalQScorePrior", globalQScorePrior, spaceSeparated = true, escape = true, format = globalQScorePriorFormat) + + optional("-S", validation_strictness, spaceSeparated = true, escape = true, format = "%s") + + conditional(remove_program_records, "-rpr", escape = true, format = "%s") + + conditional(keep_program_records, "-kpr", escape = true, format = "%s") + + optional("-sample_rename_mapping_file", sample_rename_mapping_file, spaceSeparated = true, escape = true, format = "%s") + + optional("-U", unsafe, spaceSeparated = true, escape = true, format = "%s") + + conditional(disable_auto_index_creation_and_locking_when_reading_rods, "-disable_auto_index_creation_and_locking_when_reading_rods", escape = true, format = "%s") + + conditional(no_cmdline_in_header, "-no_cmdline_in_header", escape = true, format = "%s") + + conditional(sites_only, "-sites_only", escape = true, format = "%s") + + conditional(never_trim_vcf_format_field, "-writeFullFormat", escape = true, format = "%s") + + conditional(bcf, "-bcf", escape = true, format = "%s") + + optional("-compress", bam_compression, spaceSeparated = true, escape = true, format = "%s") + + conditional(simplifyBAM, "-simplifyBAM", escape = true, format = "%s") + + conditional(disable_bam_indexing, "--disable_bam_indexing", escape = true, format = "%s") + + conditional(generate_md5, "--generate_md5", escape = true, format = "%s") + + optional("-nt", num_threads, spaceSeparated = true, escape = true, format = "%s") + + optional("-nct", num_cpu_threads_per_data_thread, spaceSeparated = true, escape = true, format = "%s") + + optional("-nit", num_io_threads, spaceSeparated = true, escape = true, format = "%s") + + conditional(monitorThreadEfficiency, "-mte", escape = true, format = "%s") + + optional("-bfh", num_bam_file_handles, spaceSeparated = true, escape = true, format = "%s") + + repeat("-rgbl", read_group_black_list, spaceSeparated = true, escape = true, format = "%s") + + repeat("-ped", pedigree, spaceSeparated = true, escape = true, format = "%s") + + repeat("-pedString", pedigreeString, spaceSeparated = true, escape = true, format = "%s") + + optional("-pedValidationType", pedigreeValidationType, spaceSeparated = true, escape = true, format = "%s") + + conditional(allow_intervals_with_unindexed_bam, "--allow_intervals_with_unindexed_bam", escape = true, format = "%s") + + conditional(generateShadowBCF, "-generateShadowBCF", escape = true, format = "%s") + + optional("-variant_index_type", variant_index_type, spaceSeparated = true, escape = true, format = "%s") + + optional("-variant_index_parameter", variant_index_parameter, spaceSeparated = true, escape = true, format = "%s") + + optional("-ref_win_stop", reference_window_stop, spaceSeparated = true, escape = true, format = "%s") + + optional("-l", logging_level, spaceSeparated = true, escape = true, format = "%s") + + optional("-log", log_to_file, spaceSeparated = true, escape = true, format = "%s") +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ContigScatterFunction.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ContigScatterFunction.scala new file mode 100644 index 0000000000000000000000000000000000000000..abfc807c26b7a88623b4b3ec649b010cb3d5d526 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ContigScatterFunction.scala @@ -0,0 +1,24 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import org.broadinstitute.gatk.queue.function.InProcessFunction +import org.broadinstitute.gatk.utils.interval.IntervalUtils + +import scala.collection.JavaConversions._ + +/** + * Splits intervals by contig instead of evenly. + */ +class ContigScatterFunction extends GATKScatterFunction with InProcessFunction { + + override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount + + protected override def maxIntervals = { + GATKScatterFunction.getGATKIntervals(this.originalGATK).contigs.size + } + + def run() { + val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK) + IntervalUtils.scatterContigIntervals(gi.samFileHeader, gi.locs, this.scatterOutputFiles) + } +} + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkScatterFunction.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkScatterFunction.scala new file mode 100644 index 0000000000000000000000000000000000000000..d49b4d34372dfebdd5eafb6469d643e3067a8d4b --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkScatterFunction.scala @@ -0,0 +1,95 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import org.broadinstitute.gatk.queue.extensions.gatk.GATKIntervals +import org.broadinstitute.gatk.queue.function.scattergather.{ CloneFunction, ScatterFunction } +import org.broadinstitute.gatk.utils.commandline.Output +import org.broadinstitute.gatk.utils.interval.IntervalUtils +import org.broadinstitute.gatk.utils.io.IOUtils + +trait GATKScatterFunction extends ScatterFunction { + /* The runtime field to set for specifying intervals. */ + private final val intervalsField = "intervals" + private final val intervalsStringField = "intervalsString" + private final val excludeIntervalsField = "excludeIntervals" + private final val excludeIntervalsStringField = "excludeIntervalsString" + private final val intervalsSetRuleField = "interval_set_rule" + private final val intervalMergingField = "interval_merging" + private final val intervalPaddingField = "interval_padding" + + @Output(doc = "Scatter function outputs") + var scatterOutputFiles: Seq[File] = Nil + + /** The original GATK function. */ + protected var originalGATK: CommandLineGATK = _ + + /** Whether the last scatter job should also include any unmapped reads. */ + var includeUnmapped: Boolean = _ + + override def init() { + this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK] + // If intervals have been specified check if unmapped is included + if (this.originalGATK.intervals.size + this.originalGATK.intervalsString.size > 0) + this.includeUnmapped = this.originalGATK.intervalsString.exists(interval => IntervalUtils.isUnmapped(interval)) + } + + override def isScatterGatherable = { + this.originalGATK.reference_sequence != null + } + + override def initCloneInputs(cloneFunction: CloneFunction, index: Int) { + cloneFunction.setFieldValue(this.intervalsField, Seq(new File("scatter.intervals"))) + if (index == this.scatterCount && this.includeUnmapped) + cloneFunction.setFieldValue(this.intervalsStringField, Seq("unmapped")) + else + cloneFunction.setFieldValue(this.intervalsStringField, Seq.empty[String]) + + cloneFunction.setFieldValue(this.intervalsSetRuleField, null) + cloneFunction.setFieldValue(this.intervalMergingField, null) + cloneFunction.setFieldValue(this.intervalPaddingField, None) + cloneFunction.setFieldValue(this.excludeIntervalsField, Seq.empty[File]) + cloneFunction.setFieldValue(this.excludeIntervalsStringField, Seq.empty[String]) + } + + override def bindCloneInputs(cloneFunction: CloneFunction, index: Int) { + val scatterPart = cloneFunction.getFieldValue(this.intervalsField) + .asInstanceOf[Seq[File]] + .map(file => IOUtils.absolute(cloneFunction.commandDirectory, file)) + cloneFunction.setFieldValue(this.intervalsField, scatterPart) + this.scatterOutputFiles ++= scatterPart + } + + /** + * @return true if all interval files exist. + */ + protected def intervalFilesExist = { + !(this.originalGATK.intervals ++ this.originalGATK.excludeIntervals).exists(interval => !interval.exists()) + } + + /** + * @return the maximum number of intervals or this.scatterCount if the maximum can't be determined ahead of time. + */ + protected def maxIntervals: Int +} + +object GATKScatterFunction { + var gatkIntervalsCache = Seq.empty[GATKIntervals] + + def getGATKIntervals(originalFunction: CommandLineGATK) = { + val gatkIntervals = new GATKIntervals( + originalFunction.reference_sequence, + originalFunction.intervals.toSeq, + originalFunction.intervalsString.toSeq, + originalFunction.interval_set_rule.getOrElse(null), + originalFunction.interval_merging.getOrElse(null), + originalFunction.interval_padding, + originalFunction.excludeIntervals.toSeq, originalFunction.excludeIntervalsString.toSeq) + gatkIntervalsCache.find(_ == gatkIntervals) match { + case Some(existingGatkIntervals) => existingGatkIntervals + case None => + gatkIntervalsCache :+= gatkIntervals + gatkIntervals + } + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala new file mode 100644 index 0000000000000000000000000000000000000000..aea609fb98b311748e98cd044cf0b454a88bfe94 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala @@ -0,0 +1,121 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.core.summary.Summarizable +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } +import org.broadinstitute.gatk.utils.report.{ GATKReport, GATKReportTable } + +class GenotypeConcordance(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction with Summarizable { + analysisName = "GenotypeConcordance" + val analysis_type = "GenotypeConcordance" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** The variants and genotypes to evaluate */ + @Input(fullName = "eval", shortName = "eval", doc = "The variants and genotypes to evaluate", required = true, exclusiveOf = "", validation = "") + var eval: File = _ + + /** The variants and genotypes to compare against */ + @Input(fullName = "comp", shortName = "comp", doc = "The variants and genotypes to compare against", required = true, exclusiveOf = "", validation = "") + var comp: File = _ + + /** Filters will be ignored */ + @Argument(fullName = "ignoreFilters", shortName = "", doc = "Filters will be ignored", required = false, exclusiveOf = "", validation = "") + var ignoreFilters: Boolean = config("ignoreFilters", default = false) + + /** One or more criteria to use to set EVAL genotypes to no-call. These genotype-level filters are only applied to the EVAL rod. */ + @Argument(fullName = "genotypeFilterExpressionEval", shortName = "gfe", doc = "One or more criteria to use to set EVAL genotypes to no-call. These genotype-level filters are only applied to the EVAL rod.", required = false, exclusiveOf = "", validation = "") + var genotypeFilterExpressionEval: List[String] = config("genotypeFilterExpressionEval", default = Nil) + + /** One or more criteria to use to set COMP genotypes to no-call. These genotype-level filters are only applied to the COMP rod. */ + @Argument(fullName = "genotypeFilterExpressionComp", shortName = "gfc", doc = "One or more criteria to use to set COMP genotypes to no-call. These genotype-level filters are only applied to the COMP rod.", required = false, exclusiveOf = "", validation = "") + var genotypeFilterExpressionComp: Seq[String] = config("genotypeFilterExpressionComp", default = Nil) + + /** Molten rather than tabular output */ + @Argument(fullName = "moltenize", shortName = "moltenize", doc = "Molten rather than tabular output", required = false, exclusiveOf = "", validation = "") + var moltenize: Boolean = config("moltenize", default = true) + + /** File to output the discordant sites and genotypes. */ + @Output(fullName = "printInterestingSites", shortName = "sites", doc = "File to output the discordant sites and genotypes.", required = false, exclusiveOf = "", validation = "") + var printInterestingSites: Option[File] = None + + /** An output file created by the walker. Will overwrite contents if file exists */ + @Output(fullName = "out", shortName = "o", doc = "An output file created by the walker. Will overwrite contents if file exists", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + def summaryFiles = Map("output" -> out) + + def summaryStats = { + val report = new GATKReport(out) + val compProportions = report.getTable("GenotypeConcordance_CompProportions") + val counts = report.getTable("GenotypeConcordance_Counts") + val evalProportions = report.getTable("GenotypeConcordance_EvalProportions") + val genotypeSummary = report.getTable("GenotypeConcordance_Summary") + val siteSummary = report.getTable("SiteConcordance_Summary") + + val samples = for (i <- 0 until genotypeSummary.getNumRows) yield genotypeSummary.get(i, "Sample").toString + + def getMap(table: GATKReportTable, column: String) = samples.distinct.map(sample => sample -> { + (for (i <- 0 until table.getNumRows if table.get(i, "Sample") == sample) yield s"${table.get(i, "Eval_Genotype")}__${table.get(i, "Comp_Genotype")}" -> table.get(i, column)).toMap + }).toMap + + Map( + "compProportions" -> getMap(compProportions, "Proportion"), + "counts" -> getMap(counts, "Count"), + "evalProportions" -> getMap(evalProportions, "Proportion"), + "genotypeSummary" -> samples.distinct.map(sample => { + val i = samples.indexOf(sample) + sample -> Map( + "Non-Reference_Discrepancy" -> genotypeSummary.get(i, "Non-Reference_Discrepancy"), + "Non-Reference_Sensitivity" -> genotypeSummary.get(i, "Non-Reference_Sensitivity"), + "Overall_Genotype_Concordance" -> genotypeSummary.get(i, "Overall_Genotype_Concordance") + ) + }).toMap, + "siteSummary" -> Map( + "ALLELES_MATCH" -> siteSummary.get(0, "ALLELES_MATCH"), + "EVAL_SUPERSET_TRUTH" -> siteSummary.get(0, "EVAL_SUPERSET_TRUTH"), + "EVAL_SUBSET_TRUTH" -> siteSummary.get(0, "EVAL_SUBSET_TRUTH"), + "ALLELES_DO_NOT_MATCH" -> siteSummary.get(0, "ALLELES_DO_NOT_MATCH"), + "EVAL_ONLY" -> siteSummary.get(0, "EVAL_ONLY"), + "TRUTH_ONLY" -> siteSummary.get(0, "TRUTH_ONLY") + ) + ) + } + + override def beforeGraph() { + super.beforeGraph() + if (eval != null) deps :+= VcfUtils.getVcfIndexFile(eval) + if (comp != null) deps :+= VcfUtils.getVcfIndexFile(comp) + } + + override def cmdLine = super.cmdLine + + required(TaggedFile.formatCommandLineParameter("-eval", eval), eval, spaceSeparated = true, escape = true, format = "%s") + + required(TaggedFile.formatCommandLineParameter("-comp", comp), comp, spaceSeparated = true, escape = true, format = "%s") + + conditional(ignoreFilters, "--ignoreFilters", escape = true, format = "%s") + + repeat("-gfe", genotypeFilterExpressionEval, spaceSeparated = true, escape = true, format = "%s") + + repeat("-gfc", genotypeFilterExpressionComp, spaceSeparated = true, escape = true, format = "%s") + + conditional(moltenize, "-moltenize", escape = true, format = "%s") + + optional("-sites", printInterestingSites, spaceSeparated = true, escape = true, format = "%s") + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala new file mode 100644 index 0000000000000000000000000000000000000000..650340d63c1be0ba9195609a616ddddb4abee8ef --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala @@ -0,0 +1,149 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } + +class GenotypeGVCFs(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "GenotypeGVCFs" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** One or more input gVCF files */ + @Input(fullName = "variant", shortName = "V", doc = "One or more input gVCF files", required = true, exclusiveOf = "", validation = "") + var variant: Seq[File] = Nil + + /** File to which variants should be written */ + @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[CatVariantsGatherer]) + var out: File = _ + + /** Include loci found to be non-variant after genotyping */ + @Argument(fullName = "includeNonVariantSites", shortName = "allSites", doc = "Include loci found to be non-variant after genotyping", required = false, exclusiveOf = "", validation = "") + var includeNonVariantSites: Boolean = config("includeNonVariantSites", default = false) + + /** Assume duplicate samples are present and uniquify all names with '.variant' and file number index */ + @Argument(fullName = "uniquifySamples", shortName = "uniquifySamples", doc = "Assume duplicate samples are present and uniquify all names with '.variant' and file number index", required = false, exclusiveOf = "", validation = "") + var uniquifySamples: Boolean = config("uniquifySamples", default = false) + + /** If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site */ + @Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false, exclusiveOf = "", validation = "") + var annotateNDA: Boolean = config("annotateNDA", default = false) + + /** Heterozygosity value used to compute prior likelihoods for any locus */ + @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false, exclusiveOf = "", validation = "") + var heterozygosity: Option[Double] = config("heterozygosity") + + /** Format string for heterozygosity */ + @Argument(fullName = "heterozygosityFormat", shortName = "", doc = "Format string for heterozygosity", required = false, exclusiveOf = "", validation = "") + var heterozygosityFormat: String = "%s" + + /** Heterozygosity for indel calling */ + @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false, exclusiveOf = "", validation = "") + var indel_heterozygosity: Option[Double] = config("indel_heterozygosity") + + /** Format string for indel_heterozygosity */ + @Argument(fullName = "indel_heterozygosityFormat", shortName = "", doc = "Format string for indel_heterozygosity", required = false, exclusiveOf = "", validation = "") + var indel_heterozygosityFormat: String = "%s" + + /** The minimum phred-scaled confidence threshold at which variants should be called */ + @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false, exclusiveOf = "", validation = "") + var standard_min_confidence_threshold_for_calling: Option[Double] = config("stand_call_conf") + + /** Format string for standard_min_confidence_threshold_for_calling */ + @Argument(fullName = "standard_min_confidence_threshold_for_callingFormat", shortName = "", doc = "Format string for standard_min_confidence_threshold_for_calling", required = false, exclusiveOf = "", validation = "") + var standard_min_confidence_threshold_for_callingFormat: String = "%s" + + /** The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold) */ + @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false, exclusiveOf = "", validation = "") + var standard_min_confidence_threshold_for_emitting: Option[Double] = config("stand_emit_conf") + + /** Format string for standard_min_confidence_threshold_for_emitting */ + @Argument(fullName = "standard_min_confidence_threshold_for_emittingFormat", shortName = "", doc = "Format string for standard_min_confidence_threshold_for_emitting", required = false, exclusiveOf = "", validation = "") + var standard_min_confidence_threshold_for_emittingFormat: String = "%s" + + /** Maximum number of alternate alleles to genotype */ + @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false, exclusiveOf = "", validation = "") + var max_alternate_alleles: Option[Int] = config("max_alternate_alleles") + + /** Input prior for calls */ + @Argument(fullName = "input_prior", shortName = "inputPrior", doc = "Input prior for calls", required = false, exclusiveOf = "", validation = "") + var input_prior: List[Double] = config("input_prior", default = Nil) + + /** Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy). */ + @Argument(fullName = "sample_ploidy", shortName = "ploidy", doc = "Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required = false, exclusiveOf = "", validation = "") + var sample_ploidy: Option[Int] = config("sample_ploidy") + + /** One or more specific annotations to recompute. The single value 'none' removes the default annotations */ + @Argument(fullName = "annotation", shortName = "A", doc = "One or more specific annotations to recompute. The single value 'none' removes the default annotations", required = false, exclusiveOf = "", validation = "") + var annotation: List[String] = config("annotation", default = Nil, freeVar = false) + + /** One or more classes/groups of annotations to apply to variant calls */ + @Argument(fullName = "group", shortName = "G", doc = "One or more classes/groups of annotations to apply to variant calls", required = false, exclusiveOf = "", validation = "") + var group: List[String] = config("group", default = Nil) + + /** dbSNP file */ + @Input(fullName = "dbsnp", shortName = "D", doc = "dbSNP file", required = false, exclusiveOf = "", validation = "") + var dbsnp: Option[File] = config("dbsnp") + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + @Output + @Gather(enabled = false) + private var outputIndex: File = _ + + @Output + @Gather(enabled = false) + private var dbsnpIndex: File = _ + + override def beforeGraph() { + super.beforeGraph() + deps ++= variant.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + outputIndex = VcfUtils.getVcfIndexFile(out) + dbsnp.foreach(x => dbsnpIndex = VcfUtils.getVcfIndexFile(x)) + } + + override def cmdLine = super.cmdLine + + repeat("-V", variant, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + conditional(includeNonVariantSites, "-allSites", escape = true, format = "%s") + + conditional(uniquifySamples, "-uniquifySamples", escape = true, format = "%s") + + conditional(annotateNDA, "-nda", escape = true, format = "%s") + + optional("-hets", heterozygosity, spaceSeparated = true, escape = true, format = heterozygosityFormat) + + optional("-indelHeterozygosity", indel_heterozygosity, spaceSeparated = true, escape = true, format = indel_heterozygosityFormat) + + optional("-stand_call_conf", standard_min_confidence_threshold_for_calling, spaceSeparated = true, escape = true, format = standard_min_confidence_threshold_for_callingFormat) + + optional("-stand_emit_conf", standard_min_confidence_threshold_for_emitting, spaceSeparated = true, escape = true, format = standard_min_confidence_threshold_for_emittingFormat) + + optional("-maxAltAlleles", max_alternate_alleles, spaceSeparated = true, escape = true, format = "%s") + + repeat("-inputPrior", input_prior, spaceSeparated = true, escape = true, format = "%s") + + optional("-ploidy", sample_ploidy, spaceSeparated = true, escape = true, format = "%s") + + repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") + + repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} + +object GenotypeGVCFs { + def apply(root: Configurable, gvcfFiles: List[File], output: File): GenotypeGVCFs = { + val gg = new GenotypeGVCFs(root) + gg.variant = gvcfFiles + gg.out = output + gg + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala new file mode 100644 index 0000000000000000000000000000000000000000..9eac2ba9e3d4974e7b479addcb3d0f1dadf5ef56 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala @@ -0,0 +1,519 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output } + +class HaplotypeCaller(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "HaplotypeCaller" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** File to which variants should be written */ + @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[CatVariantsGatherer]) + var out: File = _ + + /** What likelihood calculation engine to use to calculate the relative likelihood of reads vs haplotypes */ + @Argument(fullName = "likelihoodCalculationEngine", shortName = "likelihoodEngine", doc = "What likelihood calculation engine to use to calculate the relative likelihood of reads vs haplotypes", required = false, exclusiveOf = "", validation = "") + var likelihoodCalculationEngine: String = _ + + /** How to solve heterogeneous kmer situations using the fast method */ + @Argument(fullName = "heterogeneousKmerSizeResolution", shortName = "hksr", doc = "How to solve heterogeneous kmer situations using the fast method", required = false, exclusiveOf = "", validation = "") + var heterogeneousKmerSizeResolution: String = _ + + /** dbSNP file */ + @Input(fullName = "dbsnp", shortName = "D", doc = "dbSNP file", required = false, exclusiveOf = "", validation = "") + var dbsnp: Option[File] = config("dbsnp") + + /** If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping */ + @Argument(fullName = "dontTrimActiveRegions", shortName = "dontTrimActiveRegions", doc = "If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping", required = false, exclusiveOf = "", validation = "") + var dontTrimActiveRegions: Boolean = config("dontTrimActiveRegions", default = false) + + /** the maximum extent into the full active region extension that we're willing to go in genotyping our events for discovery */ + @Argument(fullName = "maxDiscARExtension", shortName = "maxDiscARExtension", doc = "the maximum extent into the full active region extension that we're willing to go in genotyping our events for discovery", required = false, exclusiveOf = "", validation = "") + var maxDiscARExtension: Option[Int] = config("maxDiscARExtension") + + /** the maximum extent into the full active region extension that we're willing to go in genotyping our events for GGA mode */ + @Argument(fullName = "maxGGAARExtension", shortName = "maxGGAARExtension", doc = "the maximum extent into the full active region extension that we're willing to go in genotyping our events for GGA mode", required = false, exclusiveOf = "", validation = "") + var maxGGAARExtension: Option[Int] = config("maxGGAARExtension") + + /** Include at least this many bases around an event for calling indels */ + @Argument(fullName = "paddingAroundIndels", shortName = "paddingAroundIndels", doc = "Include at least this many bases around an event for calling indels", required = false, exclusiveOf = "", validation = "") + var paddingAroundIndels: Option[Int] = config("paddingAroundIndels") + + /** Include at least this many bases around an event for calling snps */ + @Argument(fullName = "paddingAroundSNPs", shortName = "paddingAroundSNPs", doc = "Include at least this many bases around an event for calling snps", required = false, exclusiveOf = "", validation = "") + var paddingAroundSNPs: Option[Int] = config("paddingAroundSNPs") + + /** Comparison VCF file */ + @Input(fullName = "comp", shortName = "comp", doc = "Comparison VCF file", required = false, exclusiveOf = "", validation = "") + var comp: List[File] = config("comp", default = Nil) + + /** One or more specific annotations to apply to variant calls */ + @Argument(fullName = "annotation", shortName = "A", doc = "One or more specific annotations to apply to variant calls", required = false, exclusiveOf = "", validation = "") + var annotation: List[String] = config("annotation", default = Nil, freeVar = false) + + /** One or more specific annotations to exclude */ + @Argument(fullName = "excludeAnnotation", shortName = "XA", doc = "One or more specific annotations to exclude", required = false, exclusiveOf = "", validation = "") + var excludeAnnotation: List[String] = config("excludeAnnotation", default = Nil, freeVar = false) + + /** One or more classes/groups of annotations to apply to variant calls */ + @Argument(fullName = "group", shortName = "G", doc = "One or more classes/groups of annotations to apply to variant calls", required = false, exclusiveOf = "", validation = "") + var group: List[String] = config("group", default = Nil, freeVar = false) + + /** Print out very verbose debug information about each triggering active region */ + @Argument(fullName = "debug", shortName = "debug", doc = "Print out very verbose debug information about each triggering active region", required = false, exclusiveOf = "", validation = "") + var debug: Boolean = config("debug", default = false, freeVar = false) + + /** Use the contamination-filtered read maps for the purposes of annotating variants */ + @Argument(fullName = "useFilteredReadsForAnnotations", shortName = "useFilteredReadsForAnnotations", doc = "Use the contamination-filtered read maps for the purposes of annotating variants", required = false, exclusiveOf = "", validation = "") + var useFilteredReadsForAnnotations: Boolean = config("useFilteredReadsForAnnotations", default = false) + + /** Mode for emitting reference confidence scores */ + @Argument(fullName = "emitRefConfidence", shortName = "ERC", doc = "Mode for emitting reference confidence scores", required = false, exclusiveOf = "", validation = "") + var emitRefConfidence: String = _ + + /** File to which assembled haplotypes should be written */ + @Output(fullName = "bamOutput", shortName = "bamout", doc = "File to which assembled haplotypes should be written", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[BamGatherFunction]) + var bamOutput: File = _ + + /** Automatically generated md5 for bamOutput */ + @Output(fullName = "bamOutputMD5", shortName = "", doc = "Automatically generated md5 for bamOutput", required = false, exclusiveOf = "", validation = "") + @Gather(enabled = false) + private var bamOutputMD5: File = _ + + /** Which haplotypes should be written to the BAM */ + @Argument(fullName = "bamWriterType", shortName = "bamWriterType", doc = "Which haplotypes should be written to the BAM", required = false, exclusiveOf = "", validation = "") + var bamWriterType: String = _ + + /** Don't skip calculations in ActiveRegions with no variants */ + @Argument(fullName = "disableOptimizations", shortName = "disableOptimizations", doc = "Don't skip calculations in ActiveRegions with no variants", required = false, exclusiveOf = "", validation = "") + var disableOptimizations: Boolean = config("disableOptimizations", default = false) + + /** If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site */ + @Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false, exclusiveOf = "", validation = "") + var annotateNDA: Boolean = config("annotateNDA", default = false) + + /** Heterozygosity value used to compute prior likelihoods for any locus */ + @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false, exclusiveOf = "", validation = "") + var heterozygosity: Option[Double] = config("heterozygosity") + + /** Format string for heterozygosity */ + @Argument(fullName = "heterozygosityFormat", shortName = "", doc = "Format string for heterozygosity", required = false, exclusiveOf = "", validation = "") + var heterozygosityFormat: String = "%s" + + /** Heterozygosity for indel calling */ + @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false, exclusiveOf = "", validation = "") + var indel_heterozygosity: Option[Double] = config("indel_heterozygosity") + + /** Format string for indel_heterozygosity */ + @Argument(fullName = "indel_heterozygosityFormat", shortName = "", doc = "Format string for indel_heterozygosity", required = false, exclusiveOf = "", validation = "") + var indel_heterozygosityFormat: String = "%s" + + /** The minimum phred-scaled confidence threshold at which variants should be called */ + @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false, exclusiveOf = "", validation = "") + var standard_min_confidence_threshold_for_calling: Option[Double] = config("stand_call_conf") + + /** Format string for standard_min_confidence_threshold_for_calling */ + @Argument(fullName = "standard_min_confidence_threshold_for_callingFormat", shortName = "", doc = "Format string for standard_min_confidence_threshold_for_calling", required = false, exclusiveOf = "", validation = "") + var standard_min_confidence_threshold_for_callingFormat: String = "%s" + + /** The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold) */ + @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false, exclusiveOf = "", validation = "") + var standard_min_confidence_threshold_for_emitting: Option[Double] = config("stand_emit_conf") + + /** Format string for standard_min_confidence_threshold_for_emitting */ + @Argument(fullName = "standard_min_confidence_threshold_for_emittingFormat", shortName = "", doc = "Format string for standard_min_confidence_threshold_for_emitting", required = false, exclusiveOf = "", validation = "") + var standard_min_confidence_threshold_for_emittingFormat: String = "%s" + + /** Maximum number of alternate alleles to genotype */ + @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false, exclusiveOf = "", validation = "") + var max_alternate_alleles: Option[Int] = config("max_alternate_alleles") + + /** Input prior for calls */ + @Argument(fullName = "input_prior", shortName = "inputPrior", doc = "Input prior for calls", required = false, exclusiveOf = "", validation = "") + var input_prior: List[Double] = config("input_prior", default = Nil) + + /** Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy). */ + @Argument(fullName = "sample_ploidy", shortName = "ploidy", doc = "Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required = false, exclusiveOf = "", validation = "") + var sample_ploidy: Option[Int] = config("sample_ploidy") + + /** Specifies how to determine the alternate alleles to use for genotyping */ + @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false, exclusiveOf = "", validation = "") + var genotyping_mode: Option[String] = config("genotyping_mode") + + /** The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES */ + @Input(fullName = "alleles", shortName = "alleles", doc = "The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required = false, exclusiveOf = "", validation = "") + var alleles: Option[File] = None + + /** Fraction of contamination in sequencing data (for all samples) to aggressively remove */ + @Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false, exclusiveOf = "", validation = "") + var contamination_fraction_to_filter: Option[Double] = config("contamination_fraction_to_filter") + + /** Format string for contamination_fraction_to_filter */ + @Argument(fullName = "contamination_fraction_to_filterFormat", shortName = "", doc = "Format string for contamination_fraction_to_filter", required = false, exclusiveOf = "", validation = "") + var contamination_fraction_to_filterFormat: String = "%s" + + /** Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be \"<SampleID><TAB><Contamination>\" (Contamination is double) per line; No header. */ + @Input(fullName = "contamination_fraction_per_sample_file", shortName = "contaminationFile", doc = "Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be \"<SampleID><TAB><Contamination>\" (Contamination is double) per line; No header.", required = false, exclusiveOf = "", validation = "") + var contamination_fraction_per_sample_file: Option[File] = config("contamination_fraction_per_sample_file") + + /** Non-reference probability calculation model to employ */ + @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false, exclusiveOf = "", validation = "") + var p_nonref_model: Option[String] = config("p_nonref_model") + + /** x */ + @Argument(fullName = "exactcallslog", shortName = "logExactCalls", doc = "x", required = false, exclusiveOf = "", validation = "") + var exactcallslog: Option[File] = config("exactcallslog") + + /** Specifies which type of calls we should output */ + @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false, exclusiveOf = "", validation = "") + var output_mode: Option[String] = config("output_mode") + + /** Annotate all sites with PLs */ + @Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false, exclusiveOf = "", validation = "") + var allSitePLs: Boolean = config("allSitePLs", default = false) + + /** Flat gap continuation penalty for use in the Pair HMM */ + @Argument(fullName = "gcpHMM", shortName = "gcpHMM", doc = "Flat gap continuation penalty for use in the Pair HMM", required = false, exclusiveOf = "", validation = "") + var gcpHMM: Option[Int] = config("gcpHMM") + + /** The PairHMM implementation to use for genotype likelihood calculations */ + @Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for genotype likelihood calculations", required = false, exclusiveOf = "", validation = "") + var pair_hmm_implementation: org.broadinstitute.gatk.utils.pairhmm.PairHMM.HMM_IMPLEMENTATION = _ + + /** The PairHMM machine-dependent sub-implementation to use for genotype likelihood calculations */ + @Argument(fullName = "pair_hmm_sub_implementation", shortName = "pairHMMSub", doc = "The PairHMM machine-dependent sub-implementation to use for genotype likelihood calculations", required = false, exclusiveOf = "", validation = "") + var pair_hmm_sub_implementation: org.broadinstitute.gatk.utils.pairhmm.PairHMM.HMM_SUB_IMPLEMENTATION = _ + + /** Load the vector logless PairHMM library each time a GATK run is initiated in the test suite */ + @Argument(fullName = "always_load_vector_logless_PairHMM_lib", shortName = "alwaysloadVectorHMM", doc = "Load the vector logless PairHMM library each time a GATK run is initiated in the test suite", required = false, exclusiveOf = "", validation = "") + var always_load_vector_logless_PairHMM_lib: Boolean = config("always_load_vector_logless_PairHMM_lib", default = false) + + /** The global assumed mismapping rate for reads */ + @Argument(fullName = "phredScaledGlobalReadMismappingRate", shortName = "globalMAPQ", doc = "The global assumed mismapping rate for reads", required = false, exclusiveOf = "", validation = "") + var phredScaledGlobalReadMismappingRate: Option[Int] = config("phredScaledGlobalReadMismappingRate") + + /** Disable the use of the FPGA HMM implementation */ + @Argument(fullName = "noFpga", shortName = "noFpga", doc = "Disable the use of the FPGA HMM implementation", required = false, exclusiveOf = "", validation = "") + var noFpga: Boolean = config("noFpga", default = false) + + /** Name of single sample to use from a multi-sample bam */ + @Argument(fullName = "sample_name", shortName = "sn", doc = "Name of single sample to use from a multi-sample bam", required = false, exclusiveOf = "", validation = "") + var sample_name: Option[String] = config("sample_name") + + /** Kmer size to use in the read threading assembler */ + @Argument(fullName = "kmerSize", shortName = "kmerSize", doc = "Kmer size to use in the read threading assembler", required = false, exclusiveOf = "", validation = "") + var kmerSize: List[Int] = config("kmerSize", default = Nil) + + /** Disable iterating over kmer sizes when graph cycles are detected */ + @Argument(fullName = "dontIncreaseKmerSizesForCycles", shortName = "dontIncreaseKmerSizesForCycles", doc = "Disable iterating over kmer sizes when graph cycles are detected", required = false, exclusiveOf = "", validation = "") + var dontIncreaseKmerSizesForCycles: Boolean = config("dontIncreaseKmerSizesForCycles", default = false) + + /** Allow graphs that have non-unique kmers in the reference */ + @Argument(fullName = "allowNonUniqueKmersInRef", shortName = "allowNonUniqueKmersInRef", doc = "Allow graphs that have non-unique kmers in the reference", required = false, exclusiveOf = "", validation = "") + var allowNonUniqueKmersInRef: Boolean = config("allowNonUniqueKmersInRef", default = false) + + /** Number of samples that must pass the minPruning threshold */ + @Argument(fullName = "numPruningSamples", shortName = "numPruningSamples", doc = "Number of samples that must pass the minPruning threshold", required = false, exclusiveOf = "", validation = "") + var numPruningSamples: Option[Int] = config("numPruningSamples") + + /** Disable dangling head and tail recovery */ + @Argument(fullName = "doNotRecoverDanglingBranches", shortName = "doNotRecoverDanglingBranches", doc = "Disable dangling head and tail recovery", required = false, exclusiveOf = "", validation = "") + var doNotRecoverDanglingBranches: Boolean = config("doNotRecoverDanglingBranches", default = false) + + /** Minimum length of a dangling branch to attempt recovery */ + @Argument(fullName = "minDanglingBranchLength", shortName = "minDanglingBranchLength", doc = "Minimum length of a dangling branch to attempt recovery", required = false, exclusiveOf = "", validation = "") + var minDanglingBranchLength: Option[Int] = config("minDanglingBranchLength") + + /** 1000G consensus mode */ + @Argument(fullName = "consensus", shortName = "consensus", doc = "1000G consensus mode", required = false, exclusiveOf = "", validation = "") + var consensus: Boolean = config("consensus", default = false) + + /** Maximum number of haplotypes to consider for your population */ + @Argument(fullName = "maxNumHaplotypesInPopulation", shortName = "maxNumHaplotypesInPopulation", doc = "Maximum number of haplotypes to consider for your population", required = false, exclusiveOf = "", validation = "") + var maxNumHaplotypesInPopulation: Option[Int] = config("maxNumHaplotypesInPopulation") + + /** Use an exploratory algorithm to error correct the kmers used during assembly */ + @Argument(fullName = "errorCorrectKmers", shortName = "errorCorrectKmers", doc = "Use an exploratory algorithm to error correct the kmers used during assembly", required = false, exclusiveOf = "", validation = "") + var errorCorrectKmers: Boolean = _ + + /** Minimum support to not prune paths in the graph */ + @Argument(fullName = "minPruning", shortName = "minPruning", doc = "Minimum support to not prune paths in the graph", required = false, exclusiveOf = "", validation = "") + var minPruning: Option[Int] = config("minPruning") + + /** Write DOT formatted graph files out of the assembler for only this graph size */ + @Argument(fullName = "debugGraphTransformations", shortName = "debugGraphTransformations", doc = "Write DOT formatted graph files out of the assembler for only this graph size", required = false, exclusiveOf = "", validation = "") + var debugGraphTransformations: Boolean = config("debugGraphTransformations", default = false) + + /** Allow cycles in the kmer graphs to generate paths with multiple copies of the path sequenece rather than just the shortest paths */ + @Argument(fullName = "allowCyclesInKmerGraphToGeneratePaths", shortName = "allowCyclesInKmerGraphToGeneratePaths", doc = "Allow cycles in the kmer graphs to generate paths with multiple copies of the path sequenece rather than just the shortest paths", required = false, exclusiveOf = "", validation = "") + var allowCyclesInKmerGraphToGeneratePaths: Boolean = config("allowCyclesInKmerGraphToGeneratePaths", default = false) + + /** Write debug assembly graph information to this file */ + @Output(fullName = "graphOutput", shortName = "graph", doc = "Write debug assembly graph information to this file", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var graphOutput: File = _ + + /** Use an exploratory algorithm to error correct the kmers used during assembly */ + @Argument(fullName = "kmerLengthForReadErrorCorrection", shortName = "kmerLengthForReadErrorCorrection", doc = "Use an exploratory algorithm to error correct the kmers used during assembly", required = false, exclusiveOf = "", validation = "") + var kmerLengthForReadErrorCorrection: Option[Int] = config("kmerLengthForReadErrorCorrection") + + /** A k-mer must be seen at least these times for it considered to be solid */ + @Argument(fullName = "minObservationsForKmerToBeSolid", shortName = "minObservationsForKmerToBeSolid", doc = "A k-mer must be seen at least these times for it considered to be solid", required = false, exclusiveOf = "", validation = "") + var minObservationsForKmerToBeSolid: Option[Int] = config("minObservationsForKmerToBeSolid") + + /** GQ thresholds for reference confidence bands */ + @Argument(fullName = "GVCFGQBands", shortName = "GQB", doc = "GQ thresholds for reference confidence bands", required = false, exclusiveOf = "", validation = "") + var GVCFGQBands: List[Int] = config("GVCFGQBands", default = Nil) + + /** The size of an indel to check for in the reference model */ + @Argument(fullName = "indelSizeToEliminateInRefModel", shortName = "ERCIS", doc = "The size of an indel to check for in the reference model", required = false, exclusiveOf = "", validation = "") + var indelSizeToEliminateInRefModel: Option[Int] = config("indelSizeToEliminateInRefModel") + + /** Minimum base quality required to consider a base for calling */ + @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false, exclusiveOf = "", validation = "") + var min_base_quality_score: Option[Int] = config("min_base_quality_score") + + /** Include unmapped reads with chromosomal coordinates */ + @Argument(fullName = "includeUmappedReads", shortName = "unmapped", doc = "Include unmapped reads with chromosomal coordinates", required = false, exclusiveOf = "", validation = "") + var includeUmappedReads: Boolean = config("includeUmappedReads", default = false) + + /** Use additional trigger on variants found in an external alleles file */ + @Argument(fullName = "useAllelesTrigger", shortName = "allelesTrigger", doc = "Use additional trigger on variants found in an external alleles file", required = false, exclusiveOf = "", validation = "") + var useAllelesTrigger: Boolean = config("useAllelesTrigger", default = false) + + /** Disable physical phasing */ + @Argument(fullName = "doNotRunPhysicalPhasing", shortName = "doNotRunPhysicalPhasing", doc = "Disable physical phasing", required = false, exclusiveOf = "", validation = "") + var doNotRunPhysicalPhasing: Boolean = config("doNotRunPhysicalPhasing", default = false) + + /** Only use reads from this read group when making calls (but use all reads to build the assembly) */ + @Argument(fullName = "keepRG", shortName = "keepRG", doc = "Only use reads from this read group when making calls (but use all reads to build the assembly)", required = false, exclusiveOf = "", validation = "") + var keepRG: Option[String] = config("keepRG") + + /** Just determine ActiveRegions, don't perform assembly or calling */ + @Argument(fullName = "justDetermineActiveRegions", shortName = "justDetermineActiveRegions", doc = "Just determine ActiveRegions, don't perform assembly or calling", required = false, exclusiveOf = "", validation = "") + var justDetermineActiveRegions: Boolean = config("justDetermineActiveRegions", default = false) + + /** Perform assembly but do not genotype variants */ + @Argument(fullName = "dontGenotype", shortName = "dontGenotype", doc = "Perform assembly but do not genotype variants", required = false, exclusiveOf = "", validation = "") + var dontGenotype: Boolean = config("dontGenotype", default = false) + + /** Do not analyze soft clipped bases in the reads */ + @Argument(fullName = "dontUseSoftClippedBases", shortName = "dontUseSoftClippedBases", doc = "Do not analyze soft clipped bases in the reads", required = false, exclusiveOf = "", validation = "") + var dontUseSoftClippedBases: Boolean = config("dontUseSoftClippedBases", default = false) + + /** Write a BAM called assemblyFailure.bam capturing all of the reads that were in the active region when the assembler failed for any reason */ + @Argument(fullName = "captureAssemblyFailureBAM", shortName = "captureAssemblyFailureBAM", doc = "Write a BAM called assemblyFailure.bam capturing all of the reads that were in the active region when the assembler failed for any reason", required = false, exclusiveOf = "", validation = "") + var captureAssemblyFailureBAM: Boolean = config("captureAssemblyFailureBAM", default = false) + + /** Use an exploratory algorithm to error correct the kmers used during assembly */ + @Argument(fullName = "errorCorrectReads", shortName = "errorCorrectReads", doc = "Use an exploratory algorithm to error correct the kmers used during assembly", required = false, exclusiveOf = "", validation = "") + var errorCorrectReads: Boolean = config("errorCorrectReads", default = false) + + /** The PCR indel model to use */ + @Argument(fullName = "pcr_indel_model", shortName = "pcrModel", doc = "The PCR indel model to use", required = false, exclusiveOf = "", validation = "") + var pcr_indel_model: Option[String] = config("pcr_indel_model") + + /** Maximum reads in an active region */ + @Argument(fullName = "maxReadsInRegionPerSample", shortName = "maxReadsInRegionPerSample", doc = "Maximum reads in an active region", required = false, exclusiveOf = "", validation = "") + var maxReadsInRegionPerSample: Option[Int] = config("maxReadsInRegionPerSample") + + /** Minimum number of reads sharing the same alignment start for each genomic location in an active region */ + @Argument(fullName = "minReadsPerAlignmentStart", shortName = "minReadsPerAlignStart", doc = "Minimum number of reads sharing the same alignment start for each genomic location in an active region", required = false, exclusiveOf = "", validation = "") + var minReadsPerAlignmentStart: Option[Int] = config("minReadsPerAlignmentStart") + + /** Output the raw activity profile results in IGV format */ + @Output(fullName = "activityProfileOut", shortName = "APO", doc = "Output the raw activity profile results in IGV format", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var activityProfileOut: File = _ + + /** Output the active region to this IGV formatted file */ + @Output(fullName = "activeRegionOut", shortName = "ARO", doc = "Output the active region to this IGV formatted file", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var activeRegionOut: File = _ + + /** Use this interval list file as the active regions to process */ + @Input(fullName = "activeRegionIn", shortName = "AR", doc = "Use this interval list file as the active regions to process", required = false, exclusiveOf = "", validation = "") + var activeRegionIn: Seq[File] = Nil + + /** The active region extension; if not provided defaults to Walker annotated default */ + @Argument(fullName = "activeRegionExtension", shortName = "activeRegionExtension", doc = "The active region extension; if not provided defaults to Walker annotated default", required = false, exclusiveOf = "", validation = "") + var activeRegionExtension: Option[Int] = config("activeRegionExtension") + + /** If provided, all bases will be tagged as active */ + @Argument(fullName = "forceActive", shortName = "forceActive", doc = "If provided, all bases will be tagged as active", required = false, exclusiveOf = "", validation = "") + var forceActive: Boolean = config("forceActive", default = false) + + /** The active region maximum size; if not provided defaults to Walker annotated default */ + @Argument(fullName = "activeRegionMaxSize", shortName = "activeRegionMaxSize", doc = "The active region maximum size; if not provided defaults to Walker annotated default", required = false, exclusiveOf = "", validation = "") + var activeRegionMaxSize: Option[Int] = config("activeRegionMaxSize") + + /** The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default */ + @Argument(fullName = "bandPassSigma", shortName = "bandPassSigma", doc = "The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default", required = false, exclusiveOf = "", validation = "") + var bandPassSigma: Option[Double] = config("bandPassSigma") + + /** Format string for bandPassSigma */ + @Argument(fullName = "bandPassSigmaFormat", shortName = "", doc = "Format string for bandPassSigma", required = false, exclusiveOf = "", validation = "") + var bandPassSigmaFormat: String = "%s" + + /** Region probability propagation distance beyond it's maximum size. */ + @Argument(fullName = "maxProbPropagationDistance", shortName = "maxProbPropDist", doc = "Region probability propagation distance beyond it's maximum size.", required = false, exclusiveOf = "", validation = "") + var maxProbPropagationDistance: Option[Int] = config("maxProbPropagationDistance") + + /** Threshold for the probability of a profile state being active. */ + @Argument(fullName = "activeProbabilityThreshold", shortName = "ActProbThresh", doc = "Threshold for the probability of a profile state being active.", required = false, exclusiveOf = "", validation = "") + var activeProbabilityThreshold: Option[Double] = config("activeProbabilityThreshold") + + /** Format string for activeProbabilityThreshold */ + @Argument(fullName = "activeProbabilityThresholdFormat", shortName = "", doc = "Format string for activeProbabilityThreshold", required = false, exclusiveOf = "", validation = "") + var activeProbabilityThresholdFormat: String = "%s" + + /** Minimum read mapping quality required to consider a read for analysis with the HaplotypeCaller */ + @Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for analysis with the HaplotypeCaller", required = false, exclusiveOf = "", validation = "") + var min_mapping_quality_score: Option[Int] = config("min_mapping_quality_score") + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + @Output + @Gather(enabled = false) + private var outputIndex: File = _ + + @Output + @Gather(enabled = false) + private var outputBamIndex: File = _ + + override def beforeGraph() { + super.beforeGraph() + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + outputIndex = VcfUtils.getVcfIndexFile(out) + dbsnp.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + deps ++= comp.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => new File(orig + ".idx")) + if (bamOutput != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(bamOutput)) + if (!disable_bam_indexing) + outputBamIndex = new File(bamOutput.getPath.stripSuffix(".bam") + ".bai") + if (bamOutput != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(bamOutput)) + if (generate_md5) + bamOutputMD5 = new File(bamOutput.getPath + ".md5") + alleles.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + num_cpu_threads_per_data_thread = Some(getThreads) + } + + override def cmdLine = super.cmdLine + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + optional("-likelihoodEngine", likelihoodCalculationEngine, spaceSeparated = true, escape = true, format = "%s") + + optional("-hksr", heterogeneousKmerSizeResolution, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + conditional(dontTrimActiveRegions, "-dontTrimActiveRegions", escape = true, format = "%s") + + optional("-maxDiscARExtension", maxDiscARExtension, spaceSeparated = true, escape = true, format = "%s") + + optional("-maxGGAARExtension", maxGGAARExtension, spaceSeparated = true, escape = true, format = "%s") + + optional("-paddingAroundIndels", paddingAroundIndels, spaceSeparated = true, escape = true, format = "%s") + + optional("-paddingAroundSNPs", paddingAroundSNPs, spaceSeparated = true, escape = true, format = "%s") + + repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") + + repeat("-XA", excludeAnnotation, spaceSeparated = true, escape = true, format = "%s") + + repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") + + conditional(debug, "-debug", escape = true, format = "%s") + + conditional(useFilteredReadsForAnnotations, "-useFilteredReadsForAnnotations", escape = true, format = "%s") + + optional("-ERC", emitRefConfidence, spaceSeparated = true, escape = true, format = "%s") + + optional("-bamout", bamOutput, spaceSeparated = true, escape = true, format = "%s") + + optional("-bamWriterType", bamWriterType, spaceSeparated = true, escape = true, format = "%s") + + conditional(disableOptimizations, "-disableOptimizations", escape = true, format = "%s") + + conditional(annotateNDA, "-nda", escape = true, format = "%s") + + optional("-hets", heterozygosity, spaceSeparated = true, escape = true, format = heterozygosityFormat) + + optional("-indelHeterozygosity", indel_heterozygosity, spaceSeparated = true, escape = true, format = indel_heterozygosityFormat) + + optional("-stand_call_conf", standard_min_confidence_threshold_for_calling, spaceSeparated = true, escape = true, format = standard_min_confidence_threshold_for_callingFormat) + + optional("-stand_emit_conf", standard_min_confidence_threshold_for_emitting, spaceSeparated = true, escape = true, format = standard_min_confidence_threshold_for_emittingFormat) + + optional("-maxAltAlleles", max_alternate_alleles, spaceSeparated = true, escape = true, format = "%s") + + repeat("-inputPrior", input_prior, spaceSeparated = true, escape = true, format = "%s") + + optional("-ploidy", sample_ploidy, spaceSeparated = true, escape = true, format = "%s") + + optional("-gt_mode", genotyping_mode, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-alleles", alleles), alleles, spaceSeparated = true, escape = true, format = "%s") + + optional("-contamination", contamination_fraction_to_filter, spaceSeparated = true, escape = true, format = contamination_fraction_to_filterFormat) + + optional("-contaminationFile", contamination_fraction_per_sample_file, spaceSeparated = true, escape = true, format = "%s") + + optional("-pnrm", p_nonref_model, spaceSeparated = true, escape = true, format = "%s") + + optional("-logExactCalls", exactcallslog, spaceSeparated = true, escape = true, format = "%s") + + optional("-out_mode", output_mode, spaceSeparated = true, escape = true, format = "%s") + conditional(allSitePLs, "-allSitePLs", escape = true, format = "%s") + + optional("-gcpHMM", gcpHMM, spaceSeparated = true, escape = true, format = "%s") + + optional("-pairHMM", pair_hmm_implementation, spaceSeparated = true, escape = true, format = "%s") + + optional("-pairHMMSub", pair_hmm_sub_implementation, spaceSeparated = true, escape = true, format = "%s") + + conditional(always_load_vector_logless_PairHMM_lib, "-alwaysloadVectorHMM", escape = true, format = "%s") + + optional("-globalMAPQ", phredScaledGlobalReadMismappingRate, spaceSeparated = true, escape = true, format = "%s") + + conditional(noFpga, "-noFpga", escape = true, format = "%s") + optional("-sn", sample_name, spaceSeparated = true, escape = true, format = "%s") + + repeat("-kmerSize", kmerSize, spaceSeparated = true, escape = true, format = "%s") + + conditional(dontIncreaseKmerSizesForCycles, "-dontIncreaseKmerSizesForCycles", escape = true, format = "%s") + + conditional(allowNonUniqueKmersInRef, "-allowNonUniqueKmersInRef", escape = true, format = "%s") + + optional("-numPruningSamples", numPruningSamples, spaceSeparated = true, escape = true, format = "%s") + + conditional(doNotRecoverDanglingBranches, "-doNotRecoverDanglingBranches", escape = true, format = "%s") + + optional("-minDanglingBranchLength", minDanglingBranchLength, spaceSeparated = true, escape = true, format = "%s") + + conditional(consensus, "-consensus", escape = true, format = "%s") + + optional("-maxNumHaplotypesInPopulation", maxNumHaplotypesInPopulation, spaceSeparated = true, escape = true, format = "%s") + + conditional(errorCorrectKmers, "-errorCorrectKmers", escape = true, format = "%s") + + optional("-minPruning", minPruning, spaceSeparated = true, escape = true, format = "%s") + + conditional(debugGraphTransformations, "-debugGraphTransformations", escape = true, format = "%s") + + conditional(allowCyclesInKmerGraphToGeneratePaths, "-allowCyclesInKmerGraphToGeneratePaths", escape = true, format = "%s") + + optional("-graph", graphOutput, spaceSeparated = true, escape = true, format = "%s") + + optional("-kmerLengthForReadErrorCorrection", kmerLengthForReadErrorCorrection, spaceSeparated = true, escape = true, format = "%s") + + optional("-minObservationsForKmerToBeSolid", minObservationsForKmerToBeSolid, spaceSeparated = true, escape = true, format = "%s") + + repeat("-GQB", GVCFGQBands, spaceSeparated = true, escape = true, format = "%s") + + optional("-ERCIS", indelSizeToEliminateInRefModel, spaceSeparated = true, escape = true, format = "%s") + + optional("-mbq", min_base_quality_score, spaceSeparated = true, escape = true, format = "%s") + + conditional(includeUmappedReads, "-unmapped", escape = true, format = "%s") + + conditional(useAllelesTrigger, "-allelesTrigger", escape = true, format = "%s") + + conditional(doNotRunPhysicalPhasing, "-doNotRunPhysicalPhasing", escape = true, format = "%s") + + optional("-keepRG", keepRG, spaceSeparated = true, escape = true, format = "%s") + + conditional(justDetermineActiveRegions, "-justDetermineActiveRegions", escape = true, format = "%s") + + conditional(dontGenotype, "-dontGenotype", escape = true, format = "%s") + + conditional(dontUseSoftClippedBases, "-dontUseSoftClippedBases", escape = true, format = "%s") + + conditional(captureAssemblyFailureBAM, "-captureAssemblyFailureBAM", escape = true, format = "%s") + + conditional(errorCorrectReads, "-errorCorrectReads", escape = true, format = "%s") + + optional("-pcrModel", pcr_indel_model, spaceSeparated = true, escape = true, format = "%s") + + optional("-maxReadsInRegionPerSample", maxReadsInRegionPerSample, spaceSeparated = true, escape = true, format = "%s") + + optional("-minReadsPerAlignStart", minReadsPerAlignmentStart, spaceSeparated = true, escape = true, format = "%s") + + optional("-APO", activityProfileOut, spaceSeparated = true, escape = true, format = "%s") + + optional("-ARO", activeRegionOut, spaceSeparated = true, escape = true, format = "%s") + + repeat("-AR", activeRegionIn, spaceSeparated = true, escape = true, format = "%s") + + optional("-activeRegionExtension", activeRegionExtension, spaceSeparated = true, escape = true, format = "%s") + + conditional(forceActive, "-forceActive", escape = true, format = "%s") + + optional("-activeRegionMaxSize", activeRegionMaxSize, spaceSeparated = true, escape = true, format = "%s") + + optional("-bandPassSigma", bandPassSigma, spaceSeparated = true, escape = true, format = bandPassSigmaFormat) + + optional("-maxProbPropDist", maxProbPropagationDistance, spaceSeparated = true, escape = true, format = "%s") + + optional("-ActProbThresh", activeProbabilityThreshold, spaceSeparated = true, escape = true, format = activeProbabilityThresholdFormat) + + optional("-mmq", min_mapping_quality_score, spaceSeparated = true, escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} + +object HaplotypeCaller { + def apply(root: Configurable, inputFiles: List[File], outputFile: File): HaplotypeCaller = { + val hc = new HaplotypeCaller(root) + hc.input_file = inputFiles + hc.out = outputFile + hc + } + + def gvcf(root: Configurable, inputFile: File, outputFile: File): HaplotypeCaller = { + val hc = apply(root, List(inputFile), outputFile) + hc.emitRefConfidence = "GVCF" + hc.variant_index_type = Some("LINEAR") + hc.variant_index_parameter = Some(hc.config("variant_index_parameter", default = 128000).asInt) + hc + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala new file mode 100644 index 0000000000000000000000000000000000000000..7d16d832892a0daaffd999f549f5fd00f327b51c --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala @@ -0,0 +1,179 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } + +class IndelRealigner(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "IndelRealigner" + scatterClass = classOf[ContigScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = true } + + /** Input VCF file(s) with known indels */ + @Input(fullName = "knownAlleles", shortName = "known", doc = "Input VCF file(s) with known indels", required = false, exclusiveOf = "", validation = "") + var knownAlleles: Seq[File] = Nil + + /** Intervals file output from RealignerTargetCreator */ + @Input(fullName = "targetIntervals", shortName = "targetIntervals", doc = "Intervals file output from RealignerTargetCreator", required = true, exclusiveOf = "", validation = "") + var targetIntervals: File = _ + + /** LOD threshold above which the cleaner will clean */ + @Argument(fullName = "LODThresholdForCleaning", shortName = "LOD", doc = "LOD threshold above which the cleaner will clean", required = false, exclusiveOf = "", validation = "") + var LODThresholdForCleaning: Option[Double] = config("LODThresholdForCleaning") + + /** Format string for LODThresholdForCleaning */ + @Argument(fullName = "LODThresholdForCleaningFormat", shortName = "", doc = "Format string for LODThresholdForCleaning", required = false, exclusiveOf = "", validation = "") + var LODThresholdForCleaningFormat: String = "%s" + + /** Output bam */ + @Output(fullName = "out", shortName = "o", doc = "Output bam", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[BamGatherFunction]) + var out: File = _ + + /** Automatically generated md5 for out */ + @Output(fullName = "outMD5", shortName = "", doc = "Automatically generated md5 for out", required = false, exclusiveOf = "", validation = "") + @Gather(enabled = false) + private var outMD5: File = _ + + /** Determines how to compute the possible alternate consenses */ + @Argument(fullName = "consensusDeterminationModel", shortName = "model", doc = "Determines how to compute the possible alternate consenses", required = false, exclusiveOf = "", validation = "") + var consensusDeterminationModel: Option[String] = config("consensusDeterminationModel") + + /** Percentage of mismatches at a locus to be considered having high entropy (0.0 < entropy <= 1.0) */ + @Argument(fullName = "entropyThreshold", shortName = "entropy", doc = "Percentage of mismatches at a locus to be considered having high entropy (0.0 < entropy <= 1.0)", required = false, exclusiveOf = "", validation = "") + var entropyThreshold: Option[Double] = config("entropyThreshold") + + /** Format string for entropyThreshold */ + @Argument(fullName = "entropyThresholdFormat", shortName = "", doc = "Format string for entropyThreshold", required = false, exclusiveOf = "", validation = "") + var entropyThresholdFormat: String = "%s" + + /** max reads allowed to be kept in memory at a time by the SAMFileWriter */ + @Argument(fullName = "maxReadsInMemory", shortName = "maxInMemory", doc = "max reads allowed to be kept in memory at a time by the SAMFileWriter", required = false, exclusiveOf = "", validation = "") + var maxReadsInMemory: Option[Int] = config("maxReadsInMemory") + + /** maximum insert size of read pairs that we attempt to realign */ + @Argument(fullName = "maxIsizeForMovement", shortName = "maxIsize", doc = "maximum insert size of read pairs that we attempt to realign", required = false, exclusiveOf = "", validation = "") + var maxIsizeForMovement: Option[Int] = config("maxIsizeForMovement") + + /** Maximum positional move in basepairs that a read can be adjusted during realignment */ + @Argument(fullName = "maxPositionalMoveAllowed", shortName = "maxPosMove", doc = "Maximum positional move in basepairs that a read can be adjusted during realignment", required = false, exclusiveOf = "", validation = "") + var maxPositionalMoveAllowed: Option[Int] = config("maxPositionalMoveAllowed") + + /** Max alternate consensuses to try (necessary to improve performance in deep coverage) */ + @Argument(fullName = "maxConsensuses", shortName = "maxConsensuses", doc = "Max alternate consensuses to try (necessary to improve performance in deep coverage)", required = false, exclusiveOf = "", validation = "") + var maxConsensuses: Option[Int] = config("maxConsensuses") + + /** Max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage) */ + @Argument(fullName = "maxReadsForConsensuses", shortName = "greedy", doc = "Max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)", required = false, exclusiveOf = "", validation = "") + var maxReadsForConsensuses: Option[Int] = config("maxReadsForConsensuses") + + /** Max reads allowed at an interval for realignment */ + @Argument(fullName = "maxReadsForRealignment", shortName = "maxReads", doc = "Max reads allowed at an interval for realignment", required = false, exclusiveOf = "", validation = "") + var maxReadsForRealignment: Option[Int] = config("maxReadsForRealignment") + + /** Don't output the original cigar or alignment start tags for each realigned read in the output bam */ + @Argument(fullName = "noOriginalAlignmentTags", shortName = "noTags", doc = "Don't output the original cigar or alignment start tags for each realigned read in the output bam", required = false, exclusiveOf = "", validation = "") + var noOriginalAlignmentTags: Boolean = config("noOriginalAlignmentTags", default = false) + + /** Generate one output file for each input (-I) bam file (not compatible with -output) */ + @Argument(fullName = "nWayOut", shortName = "nWayOut", doc = "Generate one output file for each input (-I) bam file (not compatible with -output)", required = false, exclusiveOf = "", validation = "") + var nWayOut: Option[String] = config("nWayOut") + + /** Generate md5sums for BAMs */ + @Argument(fullName = "generate_nWayOut_md5s", shortName = "", doc = "Generate md5sums for BAMs", required = false, exclusiveOf = "", validation = "") + var generate_nWayOut_md5s: Boolean = config("generate_nWayOut_md5s", default = false) + + /** Do early check of reads against existing consensuses */ + @Argument(fullName = "check_early", shortName = "check_early", doc = "Do early check of reads against existing consensuses", required = false, exclusiveOf = "", validation = "") + var check_early: Boolean = config("check_early", default = false) + + /** Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */ + @Argument(fullName = "noPGTag", shortName = "noPG", doc = "Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required = false, exclusiveOf = "", validation = "") + var noPGTag: Boolean = config("noPGTag", default = false) + + /** Keep older PG tags left in the bam header by previous runs of this tool (by default, all these historical tags will be replaced by the latest tag generated in the current run). */ + @Argument(fullName = "keepPGTags", shortName = "keepPG", doc = "Keep older PG tags left in the bam header by previous runs of this tool (by default, all these historical tags will be replaced by the latest tag generated in the current run).", required = false, exclusiveOf = "", validation = "") + var keepPGTags: Boolean = config("keepPGTags", default = false) + + /** Output file (text) for the indels found; FOR DEBUGGING PURPOSES ONLY */ + @Output(fullName = "indelsFileForDebugging", shortName = "indels", doc = "Output file (text) for the indels found; FOR DEBUGGING PURPOSES ONLY", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var indelsFileForDebugging: File = _ + + /** print out statistics (what does or doesn't get cleaned); FOR DEBUGGING PURPOSES ONLY */ + @Output(fullName = "statisticsFileForDebugging", shortName = "stats", doc = "print out statistics (what does or doesn't get cleaned); FOR DEBUGGING PURPOSES ONLY", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var statisticsFileForDebugging: File = _ + + /** print out whether mismatching columns do or don't get cleaned out; FOR DEBUGGING PURPOSES ONLY */ + @Output(fullName = "SNPsFileForDebugging", shortName = "snps", doc = "print out whether mismatching columns do or don't get cleaned out; FOR DEBUGGING PURPOSES ONLY", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var SNPsFileForDebugging: File = _ + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + @Output + @Gather(enabled = false) + private var outputIndex: File = _ + + override def beforeGraph() { + super.beforeGraph() + deps ++= knownAlleles.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + if (!disable_bam_indexing) + outputIndex = new File(out.getPath.stripSuffix(".bam") + ".bai") + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + if (generate_md5) + outMD5 = new File(out.getPath + ".md5") + } + + override def cmdLine = super.cmdLine + + repeat("-known", knownAlleles, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + required("-targetIntervals", targetIntervals, spaceSeparated = true, escape = true, format = "%s") + + optional("-LOD", LODThresholdForCleaning, spaceSeparated = true, escape = true, format = LODThresholdForCleaningFormat) + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + optional("-model", consensusDeterminationModel, spaceSeparated = true, escape = true, format = "%s") + + optional("-entropy", entropyThreshold, spaceSeparated = true, escape = true, format = entropyThresholdFormat) + + optional("-maxInMemory", maxReadsInMemory, spaceSeparated = true, escape = true, format = "%s") + + optional("-maxIsize", maxIsizeForMovement, spaceSeparated = true, escape = true, format = "%s") + + optional("-maxPosMove", maxPositionalMoveAllowed, spaceSeparated = true, escape = true, format = "%s") + + optional("-maxConsensuses", maxConsensuses, spaceSeparated = true, escape = true, format = "%s") + + optional("-greedy", maxReadsForConsensuses, spaceSeparated = true, escape = true, format = "%s") + + optional("-maxReads", maxReadsForRealignment, spaceSeparated = true, escape = true, format = "%s") + + conditional(noOriginalAlignmentTags, "-noTags", escape = true, format = "%s") + + optional("-nWayOut", nWayOut, spaceSeparated = true, escape = true, format = "%s") + + conditional(generate_nWayOut_md5s, "--generate_nWayOut_md5s", escape = true, format = "%s") + + conditional(check_early, "-check_early", escape = true, format = "%s") + + conditional(noPGTag, "-noPG", escape = true, format = "%s") + + conditional(keepPGTags, "-keepPG", escape = true, format = "%s") + + optional("-indels", indelsFileForDebugging, spaceSeparated = true, escape = true, format = "%s") + + optional("-stats", statisticsFileForDebugging, spaceSeparated = true, escape = true, format = "%s") + + optional("-snps", SNPsFileForDebugging, spaceSeparated = true, escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} + +object IndelRealigner { + def apply(root: Configurable, input: File, targetIntervals: File, outputDir: File): IndelRealigner = { + val ir = new IndelRealigner(root) + ir.input_file :+= input + ir.targetIntervals = targetIntervals + ir.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.bam") + ir + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/LocusScatterFunction.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/LocusScatterFunction.scala new file mode 100644 index 0000000000000000000000000000000000000000..6a3b961ecbfb034465909714305e469af63a816a --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/LocusScatterFunction.scala @@ -0,0 +1,19 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import org.broadinstitute.gatk.queue.function.InProcessFunction +import org.broadinstitute.gatk.utils.interval.IntervalUtils + +import scala.collection.JavaConversions._ + +/** + * A scatter function that divides down to the locus level. + */ +class LocusScatterFunction extends GATKScatterFunction with InProcessFunction { + protected override def maxIntervals = scatterCount + + def run() { + val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK) + val splits = IntervalUtils.splitLocusIntervals(gi.locs, this.scatterOutputFiles.size) + IntervalUtils.scatterFixedIntervals(gi.samFileHeader, splits, this.scatterOutputFiles) + } +} \ No newline at end of file diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala new file mode 100644 index 0000000000000000000000000000000000000000..6eaca11891f0d5a559cd3723f5ee8852800f9260 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala @@ -0,0 +1,98 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline._ + +class PrintReads(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "PrintReads" + scatterClass = classOf[ContigScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = true } + + /** Write output to this BAM filename instead of STDOUT */ + @Output(fullName = "out", shortName = "o", doc = "Write output to this BAM filename instead of STDOUT", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[BamGatherFunction]) + var out: File = _ + + /** Exclude all reads with this read group from the output */ + @Argument(fullName = "readGroup", shortName = "readGroup", doc = "Exclude all reads with this read group from the output", required = false, exclusiveOf = "", validation = "") + var readGroup: Option[String] = config("readGroup", default = false) + + /** Exclude all reads with this platform from the output */ + @Argument(fullName = "platform", shortName = "platform", doc = "Exclude all reads with this platform from the output", required = false, exclusiveOf = "", validation = "") + var platform: Option[String] = config("platform") + + /** Print the first n reads from the file, discarding the rest */ + @Argument(fullName = "number", shortName = "n", doc = "Print the first n reads from the file, discarding the rest", required = false, exclusiveOf = "", validation = "") + var number: Option[Int] = config("number") + + /** File containing a list of samples (one per line). Can be specified multiple times */ + @Argument(fullName = "sample_file", shortName = "sf", doc = "File containing a list of samples (one per line). Can be specified multiple times", required = false, exclusiveOf = "", validation = "") + var sample_file: List[File] = config("sample_file", default = Nil) + + /** Sample name to be included in the analysis. Can be specified multiple times. */ + @Argument(fullName = "sample_name", shortName = "sn", doc = "Sample name to be included in the analysis. Can be specified multiple times.", required = false, exclusiveOf = "", validation = "") + var sample_name: List[String] = config("sample_name", default = Nil) + + /** Simplify all reads */ + @Argument(fullName = "simplify", shortName = "s", doc = "Simplify all reads", required = false, exclusiveOf = "", validation = "") + var simplify: Boolean = config("simplify", default = false) + + /** Don't output a program tag */ + @Argument(fullName = "no_pg_tag", shortName = "npt", doc = "Don't output a program tag", required = false, exclusiveOf = "", validation = "") + var no_pg_tag: Boolean = config("no_pg_tag", default = false) + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + @Output + @Gather(enabled = false) + private var outputIndex: File = _ + + @Output + @Gather(enabled = false) + private var outputMd5: File = _ + + override def beforeGraph() { + super.beforeGraph() + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + if (!disable_bam_indexing) + outputIndex = new File(out.getPath.stripSuffix(".bam") + ".bai") + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + if (generate_md5) + outputMd5 = new File(out.getPath + ".md5") + } + + override def cmdLine = super.cmdLine + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + optional("-readGroup", readGroup, spaceSeparated = true, escape = true, format = "%s") + + optional("-platform", platform, spaceSeparated = true, escape = true, format = "%s") + + optional("-n", number, spaceSeparated = true, escape = true, format = "%s") + + repeat("-sf", sample_file, spaceSeparated = true, escape = true, format = "%s") + + repeat("-sn", sample_name, spaceSeparated = true, escape = true, format = "%s") + + conditional(simplify, "-s", escape = true, format = "%s") + + conditional(no_pg_tag, "-npt", escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} + +object PrintReads { + def apply(root: Configurable, input: File, output: File): PrintReads = { + val br = new PrintReads(root) + br.input_file :+= input + br.out = output + br + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala new file mode 100644 index 0000000000000000000000000000000000000000..74ce632bd78dba9de535c3f3b4f82fb481976410 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala @@ -0,0 +1,83 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, _ } + +class RealignerTargetCreator(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "RealignerTargetCreator" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** An output file created by the walker. Will overwrite contents if file exists */ + @Output(fullName = "out", shortName = "o", doc = "An output file created by the walker. Will overwrite contents if file exists", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** Input VCF file with known indels */ + @Input(fullName = "known", shortName = "known", doc = "Input VCF file with known indels", required = false, exclusiveOf = "", validation = "") + var known: List[File] = config("known", default = Nil) + + /** window size for calculating entropy or SNP clusters */ + @Argument(fullName = "windowSize", shortName = "window", doc = "window size for calculating entropy or SNP clusters", required = false, exclusiveOf = "", validation = "") + var windowSize: Option[Int] = config("windowSize") + + /** fraction of base qualities needing to mismatch for a position to have high entropy */ + @Argument(fullName = "mismatchFraction", shortName = "mismatch", doc = "fraction of base qualities needing to mismatch for a position to have high entropy", required = false, exclusiveOf = "", validation = "") + var mismatchFraction: Option[Double] = config("mismatchFraction") + + /** Format string for mismatchFraction */ + @Argument(fullName = "mismatchFractionFormat", shortName = "", doc = "Format string for mismatchFraction", required = false, exclusiveOf = "", validation = "") + var mismatchFractionFormat: String = "%s" + + /** minimum reads at a locus to enable using the entropy calculation */ + @Argument(fullName = "minReadsAtLocus", shortName = "minReads", doc = "minimum reads at a locus to enable using the entropy calculation", required = false, exclusiveOf = "", validation = "") + var minReadsAtLocus: Option[Int] = config("minReadsAtLocus") + + /** maximum interval size; any intervals larger than this value will be dropped */ + @Argument(fullName = "maxIntervalSize", shortName = "maxInterval", doc = "maximum interval size; any intervals larger than this value will be dropped", required = false, exclusiveOf = "", validation = "") + var maxIntervalSize: Option[Int] = config("maxIntervalSize") + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("", default = false) + + if (config.contains("dbsnp")) known :+= new File(config("dbsnp").asString) + + override def beforeGraph() { + super.beforeGraph() + deps ++= known.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + } + + override def cmdLine = super.cmdLine + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + repeat("-known", known, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + optional("-window", windowSize, spaceSeparated = true, escape = true, format = "%s") + + optional("-mismatch", mismatchFraction, spaceSeparated = true, escape = true, format = mismatchFractionFormat) + + optional("-minReads", minReadsAtLocus, spaceSeparated = true, escape = true, format = "%s") + + optional("-maxInterval", maxIntervalSize, spaceSeparated = true, escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} + +object RealignerTargetCreator { + def apply(root: Configurable, input: File, outputDir: File): RealignerTargetCreator = { + val re = new RealignerTargetCreator(root) + re.input_file :+= input + re.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.intervals") + re + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala new file mode 100644 index 0000000000000000000000000000000000000000..a1ed7b732f9b72d1660c9ea8c1995e5fc0137a68 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala @@ -0,0 +1,262 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } + +class SelectVariants(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "SelectVariants" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** Input VCF file */ + @Input(fullName = "variant", shortName = "V", doc = "Input VCF file", required = true, exclusiveOf = "", validation = "") + var variant: File = _ + + /** Output variants not called in this comparison track */ + @Input(fullName = "discordance", shortName = "disc", doc = "Output variants not called in this comparison track", required = false, exclusiveOf = "", validation = "") + var discordance: Option[File] = None + + /** Output variants also called in this comparison track */ + @Input(fullName = "concordance", shortName = "conc", doc = "Output variants also called in this comparison track", required = false, exclusiveOf = "", validation = "") + var concordance: Option[File] = None + + /** File to which variants should be written */ + @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[CatVariantsGatherer]) + var out: File = _ + + /** Include genotypes from this sample */ + @Argument(fullName = "sample_name", shortName = "sn", doc = "Include genotypes from this sample", required = false, exclusiveOf = "", validation = "") + var sample_name: List[String] = config("sample_name", default = Nil) + + /** Regular expression to select multiple samples */ + @Argument(fullName = "sample_expressions", shortName = "se", doc = "Regular expression to select multiple samples", required = false, exclusiveOf = "", validation = "") + var sample_expressions: List[String] = config("sample_expressions", default = Nil) + + /** File containing a list of samples to include */ + @Input(fullName = "sample_file", shortName = "sf", doc = "File containing a list of samples to include", required = false, exclusiveOf = "", validation = "") + var sample_file: List[File] = config("sample_file", default = Nil) + + /** Exclude genotypes from this sample */ + @Argument(fullName = "exclude_sample_name", shortName = "xl_sn", doc = "Exclude genotypes from this sample", required = false, exclusiveOf = "", validation = "") + var exclude_sample_name: List[String] = config("exclude_sample_name", default = Nil) + + /** List of samples to exclude */ + @Input(fullName = "exclude_sample_file", shortName = "xl_sf", doc = "List of samples to exclude", required = false, exclusiveOf = "", validation = "") + var exclude_sample_file: List[File] = config("exclude_sample_file", default = Nil) + + /** List of sample expressions to exclude */ + @Input(fullName = "exclude_sample_expressions", shortName = "xl_se", doc = "List of sample expressions to exclude", required = false, exclusiveOf = "", validation = "") + var exclude_sample_expressions: List[File] = config("exclude_sample_expressions", default = Nil) + + /** One or more criteria to use when selecting the data */ + @Argument(fullName = "selectexpressions", shortName = "select", doc = "One or more criteria to use when selecting the data", required = false, exclusiveOf = "", validation = "") + var selectexpressions: List[String] = config("selectexpressions", default = Nil) + + /** Invert the selection criteria for -select */ + @Argument(fullName = "invertselect", shortName = "invertSelect", doc = "Invert the selection criteria for -select", required = false, exclusiveOf = "", validation = "") + var invertselect: Boolean = config("invertselect", default = false) + + /** Don't include non-variant sites */ + @Argument(fullName = "excludeNonVariants", shortName = "env", doc = "Don't include non-variant sites", required = false, exclusiveOf = "", validation = "") + var excludeNonVariants: Boolean = config("excludeNonVariants", default = false) + + /** Don't include filtered sites */ + @Argument(fullName = "excludeFiltered", shortName = "ef", doc = "Don't include filtered sites", required = false, exclusiveOf = "", validation = "") + var excludeFiltered: Boolean = config("excludeFiltered", default = false) + + /** Preserve original alleles, do not trim */ + @Argument(fullName = "preserveAlleles", shortName = "noTrim", doc = "Preserve original alleles, do not trim", required = false, exclusiveOf = "", validation = "") + var preserveAlleles: Boolean = config("preserveAlleles", default = false) + + /** Remove alternate alleles not present in any genotypes */ + @Argument(fullName = "removeUnusedAlternates", shortName = "trimAlternates", doc = "Remove alternate alleles not present in any genotypes", required = false, exclusiveOf = "", validation = "") + var removeUnusedAlternates: Boolean = config("removeUnusedAlternates", default = false) + + /** Select only variants of a particular allelicity */ + @Argument(fullName = "restrictAllelesTo", shortName = "restrictAllelesTo", doc = "Select only variants of a particular allelicity", required = false, exclusiveOf = "", validation = "") + var restrictAllelesTo: Option[String] = config("restrictAllelesTo") + + /** Store the original AC, AF, and AN values after subsetting */ + @Argument(fullName = "keepOriginalAC", shortName = "keepOriginalAC", doc = "Store the original AC, AF, and AN values after subsetting", required = false, exclusiveOf = "", validation = "") + var keepOriginalAC: Boolean = config("keepOriginalAC", default = false) + + /** Store the original DP value after subsetting */ + @Argument(fullName = "keepOriginalDP", shortName = "keepOriginalDP", doc = "Store the original DP value after subsetting", required = false, exclusiveOf = "", validation = "") + var keepOriginalDP: Boolean = config("keepOriginalDP", default = false) + + /** Output mendelian violation sites only */ + @Argument(fullName = "mendelianViolation", shortName = "mv", doc = "Output mendelian violation sites only", required = false, exclusiveOf = "", validation = "") + var mendelianViolation: Boolean = config("mendelianViolation", default = false) + + /** Output non-mendelian violation sites only */ + @Argument(fullName = "invertMendelianViolation", shortName = "invMv", doc = "Output non-mendelian violation sites only", required = false, exclusiveOf = "", validation = "") + var invertMendelianViolation: Boolean = config("invertMendelianViolation", default = false) + + /** Minimum GQ score for each trio member to accept a site as a violation */ + @Argument(fullName = "mendelianViolationQualThreshold", shortName = "mvq", doc = "Minimum GQ score for each trio member to accept a site as a violation", required = false, exclusiveOf = "", validation = "") + var mendelianViolationQualThreshold: Option[Double] = config("mendelianViolationQualThreshold") + + /** Format string for mendelianViolationQualThreshold */ + @Argument(fullName = "mendelianViolationQualThresholdFormat", shortName = "", doc = "Format string for mendelianViolationQualThreshold", required = false, exclusiveOf = "", validation = "") + var mendelianViolationQualThresholdFormat: String = "%s" + + /** Select a fraction of variants at random from the input */ + @Argument(fullName = "select_random_fraction", shortName = "fraction", doc = "Select a fraction of variants at random from the input", required = false, exclusiveOf = "", validation = "") + var select_random_fraction: Option[Double] = config("select_random_fraction") + + /** Format string for select_random_fraction */ + @Argument(fullName = "select_random_fractionFormat", shortName = "", doc = "Format string for select_random_fraction", required = false, exclusiveOf = "", validation = "") + var select_random_fractionFormat: String = "%s" + + /** Select a fraction of genotypes at random from the input and sets them to no-call */ + @Argument(fullName = "remove_fraction_genotypes", shortName = "fractionGenotypes", doc = "Select a fraction of genotypes at random from the input and sets them to no-call", required = false, exclusiveOf = "", validation = "") + var remove_fraction_genotypes: Option[Double] = config("remove_fraction_genotypes") + + /** Format string for remove_fraction_genotypes */ + @Argument(fullName = "remove_fraction_genotypesFormat", shortName = "", doc = "Format string for remove_fraction_genotypes", required = false, exclusiveOf = "", validation = "") + var remove_fraction_genotypesFormat: String = "%s" + + /** Select only a certain type of variants from the input file */ + @Argument(fullName = "selectTypeToInclude", shortName = "selectType", doc = "Select only a certain type of variants from the input file", required = false, exclusiveOf = "", validation = "") + var selectTypeToInclude: List[String] = config("selectTypeToInclude", default = Nil) + + /** Do not select certain type of variants from the input file */ + @Argument(fullName = "selectTypeToExclude", shortName = "xlSelectType", doc = "Do not select certain type of variants from the input file", required = false, exclusiveOf = "", validation = "") + var selectTypeToExclude: Seq[String] = config("selectTypeToExclude", default = Nil) + + /** List of variant IDs to select */ + @Input(fullName = "keepIDs", shortName = "IDs", doc = "List of variant IDs to select", required = false, exclusiveOf = "", validation = "") + var keepIDs: Option[File] = config("keepIDs") + + /** List of variant IDs to select */ + @Argument(fullName = "excludeIDs", shortName = "xlIDs", doc = "List of variant IDs to select", required = false, exclusiveOf = "", validation = "") + var excludeIDs: Option[File] = config("excludeIDs") + + /** If true, the incoming VariantContext will be fully decoded */ + @Argument(fullName = "fullyDecode", shortName = "", doc = "If true, the incoming VariantContext will be fully decoded", required = false, exclusiveOf = "", validation = "") + var fullyDecode: Boolean = config("fullyDecode", default = false) + + /** If true, we won't actually write the output file. For efficiency testing only */ + @Argument(fullName = "justRead", shortName = "", doc = "If true, we won't actually write the output file. For efficiency testing only", required = false, exclusiveOf = "", validation = "") + var justRead: Boolean = config("justRead", default = false) + + /** Maximum size of indels to include */ + @Argument(fullName = "maxIndelSize", shortName = "", doc = "Maximum size of indels to include", required = false, exclusiveOf = "", validation = "") + var maxIndelSize: Option[Int] = config("maxIndelSize") + + /** Minimum size of indels to include */ + @Argument(fullName = "minIndelSize", shortName = "", doc = "Minimum size of indels to include", required = false, exclusiveOf = "", validation = "") + var minIndelSize: Option[Int] = config("minIndelSize") + + /** Maximum number of samples filtered at the genotype level */ + @Argument(fullName = "maxFilteredGenotypes", shortName = "", doc = "Maximum number of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "") + var maxFilteredGenotypes: Option[Int] = config("maxFilteredGenotypes") + + /** Minimum number of samples filtered at the genotype level */ + @Argument(fullName = "minFilteredGenotypes", shortName = "", doc = "Minimum number of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "") + var minFilteredGenotypes: Option[Int] = config("minFilteredGenotypes") + + /** Maximum fraction of samples filtered at the genotype level */ + @Argument(fullName = "maxFractionFilteredGenotypes", shortName = "", doc = "Maximum fraction of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "") + var maxFractionFilteredGenotypes: Option[Double] = config("maxFractionFilteredGenotypes") + + /** Format string for maxFractionFilteredGenotypes */ + @Argument(fullName = "maxFractionFilteredGenotypesFormat", shortName = "", doc = "Format string for maxFractionFilteredGenotypes", required = false, exclusiveOf = "", validation = "") + var maxFractionFilteredGenotypesFormat: String = "%s" + + /** Maximum fraction of samples filtered at the genotype level */ + @Argument(fullName = "minFractionFilteredGenotypes", shortName = "", doc = "Maximum fraction of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "") + var minFractionFilteredGenotypes: Option[Double] = config("minFractionFilteredGenotypes") + + /** Format string for minFractionFilteredGenotypes */ + @Argument(fullName = "minFractionFilteredGenotypesFormat", shortName = "", doc = "Format string for minFractionFilteredGenotypes", required = false, exclusiveOf = "", validation = "") + var minFractionFilteredGenotypesFormat: String = "%s" + + /** Set filtered genotypes to no-call */ + @Argument(fullName = "setFilteredGtToNocall", shortName = "", doc = "Set filtered genotypes to no-call", required = false, exclusiveOf = "", validation = "") + var setFilteredGtToNocall: Boolean = config("setFilteredGtToNocall", default = false) + + /** Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored. */ + @Argument(fullName = "ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", shortName = "", doc = "Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.", required = false, exclusiveOf = "", validation = "") + var ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES: Boolean = config("ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", default = false) + + /** Forces output VCF to be compliant to up-to-date version */ + @Argument(fullName = "forceValidOutput", shortName = "", doc = "Forces output VCF to be compliant to up-to-date version", required = false, exclusiveOf = "", validation = "") + var forceValidOutput: Boolean = config("forceValidOutput", default = false) + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + @Output + @Gather(enabled = false) + private var outputIndex: File = _ + + override def beforeGraph() { + super.beforeGraph() + if (variant != null) + deps :+= VcfUtils.getVcfIndexFile(variant) + discordance.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + concordance.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + outputIndex = VcfUtils.getVcfIndexFile(out) + } + + override def cmdLine = super.cmdLine + + required(TaggedFile.formatCommandLineParameter("-V", variant), variant, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-disc", discordance), discordance, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-conc", concordance), concordance, spaceSeparated = true, escape = true, format = "%s") + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + repeat("-sn", sample_name, spaceSeparated = true, escape = true, format = "%s") + + repeat("-se", sample_expressions, spaceSeparated = true, escape = true, format = "%s") + + repeat("-sf", sample_file, spaceSeparated = true, escape = true, format = "%s") + + repeat("-xl_sn", exclude_sample_name, spaceSeparated = true, escape = true, format = "%s") + + repeat("-xl_sf", exclude_sample_file, spaceSeparated = true, escape = true, format = "%s") + + repeat("-xl_se", exclude_sample_expressions, spaceSeparated = true, escape = true, format = "%s") + + repeat("-select", selectexpressions, spaceSeparated = true, escape = true, format = "%s") + + conditional(invertselect, "-invertSelect", escape = true, format = "%s") + + conditional(excludeNonVariants, "-env", escape = true, format = "%s") + + conditional(excludeFiltered, "-ef", escape = true, format = "%s") + + conditional(preserveAlleles, "-noTrim", escape = true, format = "%s") + + conditional(removeUnusedAlternates, "-trimAlternates", escape = true, format = "%s") + + optional("-restrictAllelesTo", restrictAllelesTo, spaceSeparated = true, escape = true, format = "%s") + + conditional(keepOriginalAC, "-keepOriginalAC", escape = true, format = "%s") + + conditional(keepOriginalDP, "-keepOriginalDP", escape = true, format = "%s") + + conditional(mendelianViolation, "-mv", escape = true, format = "%s") + + conditional(invertMendelianViolation, "-invMv", escape = true, format = "%s") + + optional("-mvq", mendelianViolationQualThreshold, spaceSeparated = true, escape = true, format = mendelianViolationQualThresholdFormat) + + optional("-fraction", select_random_fraction, spaceSeparated = true, escape = true, format = select_random_fractionFormat) + + optional("-fractionGenotypes", remove_fraction_genotypes, spaceSeparated = true, escape = true, format = remove_fraction_genotypesFormat) + + repeat("-selectType", selectTypeToInclude, spaceSeparated = true, escape = true, format = "%s") + + repeat("-xlSelectType", selectTypeToExclude, spaceSeparated = true, escape = true, format = "%s") + + optional("-IDs", keepIDs, spaceSeparated = true, escape = true, format = "%s") + + optional("-xlIDs", excludeIDs, spaceSeparated = true, escape = true, format = "%s") + + conditional(fullyDecode, "--fullyDecode", escape = true, format = "%s") + + conditional(justRead, "--justRead", escape = true, format = "%s") + + optional("--maxIndelSize", maxIndelSize, spaceSeparated = true, escape = true, format = "%s") + + optional("--minIndelSize", minIndelSize, spaceSeparated = true, escape = true, format = "%s") + + optional("--maxFilteredGenotypes", maxFilteredGenotypes, spaceSeparated = true, escape = true, format = "%s") + + optional("--minFilteredGenotypes", minFilteredGenotypes, spaceSeparated = true, escape = true, format = "%s") + + optional("--maxFractionFilteredGenotypes", maxFractionFilteredGenotypes, spaceSeparated = true, escape = true, format = maxFractionFilteredGenotypesFormat) + + optional("--minFractionFilteredGenotypes", minFractionFilteredGenotypes, spaceSeparated = true, escape = true, format = minFractionFilteredGenotypesFormat) + + conditional(setFilteredGtToNocall, "--setFilteredGtToNocall", escape = true, format = "%s") + + conditional(ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES, "--ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", escape = true, format = "%s") + + conditional(forceValidOutput, "--forceValidOutput", escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala new file mode 100644 index 0000000000000000000000000000000000000000..0edfe5260fe2fb9101fd92d01fbcebce94ba0441 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala @@ -0,0 +1,322 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output, _ } + +class UnifiedGenotyper(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "UnifiedGenotyper" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together */ + @Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false, exclusiveOf = "", validation = "") + var genotype_likelihoods_model: Option[String] = config("genotype_likelihoods_model") + + /** The PCR error rate to be used for computing fragment-based likelihoods */ + @Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false, exclusiveOf = "", validation = "") + var pcr_error_rate: Option[Double] = config("pcr_error_rate") + + /** Format string for pcr_error_rate */ + @Argument(fullName = "pcr_error_rateFormat", shortName = "", doc = "Format string for pcr_error_rate", required = false, exclusiveOf = "", validation = "") + var pcr_error_rateFormat: String = "%s" + + /** If provided, we will calculate the SLOD (SB annotation) */ + @Argument(fullName = "computeSLOD", shortName = "slod", doc = "If provided, we will calculate the SLOD (SB annotation)", required = false, exclusiveOf = "", validation = "") + var computeSLOD: Boolean = config("computeSLOD", default = false) + + /** The PairHMM implementation to use for -glm INDEL genotype likelihood calculations */ + @Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for -glm INDEL genotype likelihood calculations", required = false, exclusiveOf = "", validation = "") + var pair_hmm_implementation: Option[String] = config("pair_hmm_implementation") + + /** Minimum base quality required to consider a base for calling */ + @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false, exclusiveOf = "", validation = "") + var min_base_quality_score: Option[Int] = config("min_base_quality_score") + + /** Maximum fraction of reads with deletions spanning this locus for it to be callable */ + @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable", required = false, exclusiveOf = "", validation = "") + var max_deletion_fraction: Option[Double] = config("max_deletion_fraction") + + /** Format string for max_deletion_fraction */ + @Argument(fullName = "max_deletion_fractionFormat", shortName = "", doc = "Format string for max_deletion_fraction", required = false, exclusiveOf = "", validation = "") + var max_deletion_fractionFormat: String = "%s" + + /** Minimum number of consensus indels required to trigger genotyping run */ + @Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false, exclusiveOf = "", validation = "") + var min_indel_count_for_genotyping: Option[Int] = config("min_indel_count_for_genotyping") + + /** Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles */ + @Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false, exclusiveOf = "", validation = "") + var min_indel_fraction_per_sample: Option[Double] = config("min_indel_fraction_per_sample") + + /** Format string for min_indel_fraction_per_sample */ + @Argument(fullName = "min_indel_fraction_per_sampleFormat", shortName = "", doc = "Format string for min_indel_fraction_per_sample", required = false, exclusiveOf = "", validation = "") + var min_indel_fraction_per_sampleFormat: String = "%s" + + /** Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10 -30/10 */ + @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false, exclusiveOf = "", validation = "") + var indelGapContinuationPenalty: Option[String] = config("indelGapContinuationPenalty") + + /** Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10 -30/10 */ + @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false, exclusiveOf = "", validation = "") + var indelGapOpenPenalty: Option[String] = config("indelGapOpenPenalty") + + /** Indel haplotype size */ + @Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false, exclusiveOf = "", validation = "") + var indelHaplotypeSize: Option[Int] = config("indelHaplotypeSize") + + /** Output indel debug info */ + @Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false, exclusiveOf = "", validation = "") + var indelDebug: Boolean = config("indelDebug", default = false) + + /** expt */ + @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false, exclusiveOf = "", validation = "") + var ignoreSNPAlleles: Boolean = config("ignoreSNPAlleles", default = false) + + /** expt */ + @Argument(fullName = "allReadsSP", shortName = "dl", doc = "expt", required = false, exclusiveOf = "", validation = "") + var allReadsSP: Boolean = config("allReadsSP", default = false) + + /** Ignore lane when building error model, error model is then per-site */ + @Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false, exclusiveOf = "", validation = "") + var ignoreLaneInfo: Boolean = config("ignoreLaneInfo", default = false) + + /** VCF file with the truth callset for the reference sample */ + @Input(fullName = "reference_sample_calls", shortName = "referenceCalls", doc = "VCF file with the truth callset for the reference sample", required = false, exclusiveOf = "", validation = "") + var reference_sample_calls: Option[File] = config("reference_sample_calls") + + /** Reference sample name. */ + @Argument(fullName = "reference_sample_name", shortName = "refsample", doc = "Reference sample name.", required = false, exclusiveOf = "", validation = "") + var reference_sample_name: Option[String] = config("reference_sample_name") + + /** Min quality score to consider. Smaller numbers process faster. Default: Q1. */ + @Argument(fullName = "min_quality_score", shortName = "minqs", doc = "Min quality score to consider. Smaller numbers process faster. Default: Q1.", required = false, exclusiveOf = "", validation = "") + var min_quality_score: Option[String] = config("min_quality_score") + + /** Max quality score to consider. Smaller numbers process faster. Default: Q40. */ + @Argument(fullName = "max_quality_score", shortName = "maxqs", doc = "Max quality score to consider. Smaller numbers process faster. Default: Q40.", required = false, exclusiveOf = "", validation = "") + var max_quality_score: Option[String] = config("max_quality_score") + + /** Phred-Scaled prior quality of the site. Default: Q20. */ + @Argument(fullName = "site_quality_prior", shortName = "site_prior", doc = "Phred-Scaled prior quality of the site. Default: Q20.", required = false, exclusiveOf = "", validation = "") + var site_quality_prior: Option[String] = config("site_quality_prior") + + /** The minimum confidence in the error model to make a call. Number should be between 0 (no power requirement) and 1 (maximum power required). */ + @Argument(fullName = "min_power_threshold_for_calling", shortName = "min_call_power", doc = "The minimum confidence in the error model to make a call. Number should be between 0 (no power requirement) and 1 (maximum power required).", required = false, exclusiveOf = "", validation = "") + var min_power_threshold_for_calling: Option[Double] = config("min_power_threshold_for_calling") + + /** Format string for min_power_threshold_for_calling */ + @Argument(fullName = "min_power_threshold_for_callingFormat", shortName = "", doc = "Format string for min_power_threshold_for_calling", required = false, exclusiveOf = "", validation = "") + var min_power_threshold_for_callingFormat: String = "%s" + + /** If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site */ + @Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false, exclusiveOf = "", validation = "") + var annotateNDA: Boolean = config("annotateNDA", default = false) + + /** Heterozygosity value used to compute prior likelihoods for any locus */ + @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false, exclusiveOf = "", validation = "") + var heterozygosity: Option[Double] = config("heterozygosity") + + /** Format string for heterozygosity */ + @Argument(fullName = "heterozygosityFormat", shortName = "", doc = "Format string for heterozygosity", required = false, exclusiveOf = "", validation = "") + var heterozygosityFormat: String = "%s" + + /** Heterozygosity for indel calling */ + @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false, exclusiveOf = "", validation = "") + var indel_heterozygosity: Option[Double] = config("indel_heterozygosity") + + /** Format string for indel_heterozygosity */ + @Argument(fullName = "indel_heterozygosityFormat", shortName = "", doc = "Format string for indel_heterozygosity", required = false, exclusiveOf = "", validation = "") + var indel_heterozygosityFormat: String = "%s" + + /** The minimum phred-scaled confidence threshold at which variants should be called */ + @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false, exclusiveOf = "", validation = "") + var standard_min_confidence_threshold_for_calling: Option[Double] = config("stand_call_conf") + + /** Format string for standard_min_confidence_threshold_for_calling */ + @Argument(fullName = "standard_min_confidence_threshold_for_callingFormat", shortName = "", doc = "Format string for standard_min_confidence_threshold_for_calling", required = false, exclusiveOf = "", validation = "") + var standard_min_confidence_threshold_for_callingFormat: String = "%s" + + /** The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold) */ + @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false, exclusiveOf = "", validation = "") + var standard_min_confidence_threshold_for_emitting: Option[Double] = config("stand_emit_conf") + + /** Format string for standard_min_confidence_threshold_for_emitting */ + @Argument(fullName = "standard_min_confidence_threshold_for_emittingFormat", shortName = "", doc = "Format string for standard_min_confidence_threshold_for_emitting", required = false, exclusiveOf = "", validation = "") + var standard_min_confidence_threshold_for_emittingFormat: String = "%s" + + /** Maximum number of alternate alleles to genotype */ + @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false, exclusiveOf = "", validation = "") + var max_alternate_alleles: Option[Int] = config("max_alternate_alleles") + + /** Input prior for calls */ + @Argument(fullName = "input_prior", shortName = "inputPrior", doc = "Input prior for calls", required = false, exclusiveOf = "", validation = "") + var input_prior: List[Double] = config("input_prior", default = Nil) + + /** Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy). */ + @Argument(fullName = "sample_ploidy", shortName = "ploidy", doc = "Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required = false, exclusiveOf = "", validation = "") + var sample_ploidy: Option[Int] = config("sample_ploidy") + + /** Specifies how to determine the alternate alleles to use for genotyping */ + @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false, exclusiveOf = "", validation = "") + var genotyping_mode: Option[String] = config("genotyping_mode") + + /** The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES */ + @Input(fullName = "alleles", shortName = "alleles", doc = "The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required = false, exclusiveOf = "", validation = "") + var alleles: Option[File] = config("alleles") + + /** Fraction of contamination in sequencing data (for all samples) to aggressively remove */ + @Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false, exclusiveOf = "", validation = "") + var contamination_fraction_to_filter: Option[Double] = config("contamination_fraction_to_filter") + + /** Format string for contamination_fraction_to_filter */ + @Argument(fullName = "contamination_fraction_to_filterFormat", shortName = "", doc = "Format string for contamination_fraction_to_filter", required = false, exclusiveOf = "", validation = "") + var contamination_fraction_to_filterFormat: String = "%s" + + /** Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be \"<SampleID><TAB><Contamination>\" (Contamination is double) per line; No header. */ + @Argument(fullName = "contamination_fraction_per_sample_file", shortName = "contaminationFile", doc = "Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be \"<SampleID><TAB><Contamination>\" (Contamination is double) per line; No header.", required = false, exclusiveOf = "", validation = "") + var contamination_fraction_per_sample_file: Option[File] = config("contamination_fraction_per_sample_file") + + /** Non-reference probability calculation model to employ */ + @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false, exclusiveOf = "", validation = "") + var p_nonref_model: Option[String] = config("p_nonref_model") + + /** x */ + @Argument(fullName = "exactcallslog", shortName = "logExactCalls", doc = "x", required = false, exclusiveOf = "", validation = "") + var exactcallslog: Option[File] = config("exactcallslog") + + /** Specifies which type of calls we should output */ + @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false, exclusiveOf = "", validation = "") + var output_mode: Option[String] = config("output_mode") + + /** Annotate all sites with PLs */ + @Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false, exclusiveOf = "", validation = "") + var allSitePLs: Boolean = config("allSitePLs", default = false) + + /** dbSNP file */ + @Input(fullName = "dbsnp", shortName = "D", doc = "dbSNP file", required = false, exclusiveOf = "", validation = "") + var dbsnp: Option[File] = config("dbsnp") + + /** Comparison VCF file */ + @Input(fullName = "comp", shortName = "comp", doc = "Comparison VCF file", required = false, exclusiveOf = "", validation = "") + var comp: List[File] = config("comp", default = Nil) + + /** File to which variants should be written */ + @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[CatVariantsGatherer]) + var out: File = _ + + /** If provided, only these samples will be emitted into the VCF, regardless of which samples are present in the BAM file */ + @Argument(fullName = "onlyEmitSamples", shortName = "onlyEmitSamples", doc = "If provided, only these samples will be emitted into the VCF, regardless of which samples are present in the BAM file", required = false, exclusiveOf = "", validation = "") + var onlyEmitSamples: List[String] = config("onlyEmitSamples", default = Nil) + + /** File to print all of the annotated and detailed debugging output */ + @Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false, exclusiveOf = "", validation = "") + var debug_file: File = _ + + /** File to print any relevant callability metrics output */ + @Argument(fullName = "metrics_file", shortName = "metrics", doc = "File to print any relevant callability metrics output", required = false, exclusiveOf = "", validation = "") + var metrics_file: File = _ + + /** One or more specific annotations to apply to variant calls */ + @Argument(fullName = "annotation", shortName = "A", doc = "One or more specific annotations to apply to variant calls", required = false, exclusiveOf = "", validation = "") + var annotation: List[String] = config("annotation", default = Nil, freeVar = false) + + /** One or more specific annotations to exclude */ + @Argument(fullName = "excludeAnnotation", shortName = "XA", doc = "One or more specific annotations to exclude", required = false, exclusiveOf = "", validation = "") + var excludeAnnotation: List[String] = config("excludeAnnotation", default = Nil) + + /** One or more classes/groups of annotations to apply to variant calls. The single value 'none' removes the default group */ + @Argument(fullName = "group", shortName = "G", doc = "One or more classes/groups of annotations to apply to variant calls. The single value 'none' removes the default group", required = false, exclusiveOf = "", validation = "") + var group: List[String] = config("group", default = Nil) + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + @Output + @Gather(enabled = false) + private var outputIndex: File = _ + + override def beforeGraph() { + super.beforeGraph() + reference_sample_calls.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + alleles.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + dbsnp.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + deps ++= comp.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + outputIndex = VcfUtils.getVcfIndexFile(out) + } + + override def cmdLine = super.cmdLine + + optional("-glm", genotype_likelihoods_model, spaceSeparated = true, escape = true, format = "%s") + + optional("-pcr_error", pcr_error_rate, spaceSeparated = true, escape = true, format = pcr_error_rateFormat) + + conditional(computeSLOD, "-slod", escape = true, format = "%s") + + optional("-pairHMM", pair_hmm_implementation, spaceSeparated = true, escape = true, format = "%s") + + optional("-mbq", min_base_quality_score, spaceSeparated = true, escape = true, format = "%s") + + optional("-deletions", max_deletion_fraction, spaceSeparated = true, escape = true, format = max_deletion_fractionFormat) + + optional("-minIndelCnt", min_indel_count_for_genotyping, spaceSeparated = true, escape = true, format = "%s") + + optional("-minIndelFrac", min_indel_fraction_per_sample, spaceSeparated = true, escape = true, format = min_indel_fraction_per_sampleFormat) + + optional("-indelGCP", indelGapContinuationPenalty, spaceSeparated = true, escape = true, format = "%s") + + optional("-indelGOP", indelGapOpenPenalty, spaceSeparated = true, escape = true, format = "%s") + + optional("-indelHSize", indelHaplotypeSize, spaceSeparated = true, escape = true, format = "%s") + + conditional(indelDebug, "-indelDebug", escape = true, format = "%s") + + conditional(ignoreSNPAlleles, "-ignoreSNPAlleles", escape = true, format = "%s") + + conditional(allReadsSP, "-dl", escape = true, format = "%s") + + conditional(ignoreLaneInfo, "-ignoreLane", escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-referenceCalls", reference_sample_calls), reference_sample_calls, spaceSeparated = true, escape = true, format = "%s") + + optional("-refsample", reference_sample_name, spaceSeparated = true, escape = true, format = "%s") + + optional("-minqs", min_quality_score, spaceSeparated = true, escape = true, format = "%s") + + optional("-maxqs", max_quality_score, spaceSeparated = true, escape = true, format = "%s") + + optional("-site_prior", site_quality_prior, spaceSeparated = true, escape = true, format = "%s") + + optional("-min_call_power", min_power_threshold_for_calling, spaceSeparated = true, escape = true, format = min_power_threshold_for_callingFormat) + + conditional(annotateNDA, "-nda", escape = true, format = "%s") + + optional("-hets", heterozygosity, spaceSeparated = true, escape = true, format = heterozygosityFormat) + + optional("-indelHeterozygosity", indel_heterozygosity, spaceSeparated = true, escape = true, format = indel_heterozygosityFormat) + + optional("-stand_call_conf", standard_min_confidence_threshold_for_calling, spaceSeparated = true, escape = true, format = standard_min_confidence_threshold_for_callingFormat) + + optional("-stand_emit_conf", standard_min_confidence_threshold_for_emitting, spaceSeparated = true, escape = true, format = standard_min_confidence_threshold_for_emittingFormat) + + optional("-maxAltAlleles", max_alternate_alleles, spaceSeparated = true, escape = true, format = "%s") + + repeat("-inputPrior", input_prior, spaceSeparated = true, escape = true, format = "%s") + + optional("-ploidy", sample_ploidy, spaceSeparated = true, escape = true, format = "%s") + + optional("-gt_mode", genotyping_mode, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-alleles", alleles), alleles, spaceSeparated = true, escape = true, format = "%s") + + optional("-contamination", contamination_fraction_to_filter, spaceSeparated = true, escape = true, format = contamination_fraction_to_filterFormat) + + optional("-contaminationFile", contamination_fraction_per_sample_file, spaceSeparated = true, escape = true, format = "%s") + + optional("-pnrm", p_nonref_model, spaceSeparated = true, escape = true, format = "%s") + + optional("-logExactCalls", exactcallslog, spaceSeparated = true, escape = true, format = "%s") + + optional("-out_mode", output_mode, spaceSeparated = true, escape = true, format = "%s") + + conditional(allSitePLs, "-allSitePLs", escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + repeat("-onlyEmitSamples", onlyEmitSamples, spaceSeparated = true, escape = true, format = "%s") + + optional("-debug_file", debug_file, spaceSeparated = true, escape = true, format = "%s") + + optional("-metrics", metrics_file, spaceSeparated = true, escape = true, format = "%s") + + repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") + + repeat("-XA", excludeAnnotation, spaceSeparated = true, escape = true, format = "%s") + + repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} + +object UnifiedGenotyper { + def apply(root: Configurable, inputFiles: List[File], outputFile: File): UnifiedGenotyper = { + val ug = new UnifiedGenotyper(root) + ug.input_file = inputFiles + ug.out = outputFile + ug + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala new file mode 100644 index 0000000000000000000000000000000000000000..d98a55a49eb6e34c00588fddde66dc00cddfe610 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala @@ -0,0 +1,123 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } + +class VariantAnnotator(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "VariantAnnotator" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** Input VCF file */ + @Input(fullName = "variant", shortName = "V", doc = "Input VCF file", required = true, exclusiveOf = "", validation = "") + var variant: File = _ + + /** SnpEff file from which to get annotations */ + @Input(fullName = "snpEffFile", shortName = "snpEffFile", doc = "SnpEff file from which to get annotations", required = false, exclusiveOf = "", validation = "") + var snpEffFile: Option[File] = config("snpEffFile") + + /** dbSNP file */ + @Input(fullName = "dbsnp", shortName = "D", doc = "dbSNP file", required = false, exclusiveOf = "", validation = "") + var dbsnp: Option[File] = config("dbsnp") + + /** Comparison VCF file */ + @Input(fullName = "comp", shortName = "comp", doc = "Comparison VCF file", required = false, exclusiveOf = "", validation = "") + var comp: List[File] = config("comp", default = Nil) + + /** External resource VCF file */ + @Input(fullName = "resource", shortName = "resource", doc = "External resource VCF file", required = false, exclusiveOf = "", validation = "") + var resource: List[File] = config("resource") + + /** File to which variants should be written */ + @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[CatVariantsGatherer]) + var out: File = _ + + /** One or more specific annotations to apply to variant calls */ + @Argument(fullName = "annotation", shortName = "A", doc = "One or more specific annotations to apply to variant calls", required = false, exclusiveOf = "", validation = "") + var annotation: List[String] = config("annotation", default = Nil, freeVar = false) + + /** One or more specific annotations to exclude */ + @Argument(fullName = "excludeAnnotation", shortName = "XA", doc = "One or more specific annotations to exclude", required = false, exclusiveOf = "", validation = "") + var excludeAnnotation: List[String] = config("excludeAnnotation", default = Nil) + + /** One or more classes/groups of annotations to apply to variant calls */ + @Argument(fullName = "group", shortName = "G", doc = "One or more classes/groups of annotations to apply to variant calls", required = false, exclusiveOf = "", validation = "") + var group: List[String] = config("group", default = Nil) + + /** One or more specific expressions to apply to variant calls */ + @Argument(fullName = "expression", shortName = "E", doc = "One or more specific expressions to apply to variant calls", required = false, exclusiveOf = "", validation = "") + var expression: List[String] = config("expression", default = Nil) + + /** Check for allele concordances when using an external resource VCF file */ + @Argument(fullName = "resourceAlleleConcordance", shortName = "rac", doc = "Check for allele concordances when using an external resource VCF file", required = false, exclusiveOf = "", validation = "") + var resourceAlleleConcordance: Boolean = config("resourceAlleleConcordance", default = false) + + /** Use all possible annotations (not for the faint of heart) */ + @Argument(fullName = "useAllAnnotations", shortName = "all", doc = "Use all possible annotations (not for the faint of heart)", required = false, exclusiveOf = "", validation = "") + var useAllAnnotations: Boolean = config("useAllAnnotations", default = false) + + /** Add dbSNP ID even if one is already present */ + @Argument(fullName = "alwaysAppendDbsnpId", shortName = "alwaysAppendDbsnpId", doc = "Add dbSNP ID even if one is already present", required = false, exclusiveOf = "", validation = "") + var alwaysAppendDbsnpId: Boolean = config("alwaysAppendDbsnpId", default = false) + + /** GQ threshold for annotating MV ratio */ + @Argument(fullName = "MendelViolationGenotypeQualityThreshold", shortName = "mvq", doc = "GQ threshold for annotating MV ratio", required = false, exclusiveOf = "", validation = "") + var MendelViolationGenotypeQualityThreshold: Option[Double] = config("MendelViolationGenotypeQualityThreshold") + + /** Format string for MendelViolationGenotypeQualityThreshold */ + @Argument(fullName = "MendelViolationGenotypeQualityThresholdFormat", shortName = "", doc = "Format string for MendelViolationGenotypeQualityThreshold", required = false, exclusiveOf = "", validation = "") + var MendelViolationGenotypeQualityThresholdFormat: String = "%s" + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + @Output + @Gather(enabled = false) + private var outputIndex: File = _ + + override def beforeGraph() { + super.beforeGraph() + if (variant != null) + deps :+= VcfUtils.getVcfIndexFile(variant) + snpEffFile.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + dbsnp.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + deps ++= comp.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + deps ++= resource.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + outputIndex = VcfUtils.getVcfIndexFile(out) + } + + override def cmdLine = super.cmdLine + + required(TaggedFile.formatCommandLineParameter("-V", variant), variant, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-snpEffFile", snpEffFile), snpEffFile, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + repeat("-resource", resource, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") + + repeat("-XA", excludeAnnotation, spaceSeparated = true, escape = true, format = "%s") + + repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") + + repeat("-E", expression, spaceSeparated = true, escape = true, format = "%s") + + conditional(resourceAlleleConcordance, "-rac", escape = true, format = "%s") + + conditional(useAllAnnotations, "-all", escape = true, format = "%s") + + conditional(alwaysAppendDbsnpId, "-alwaysAppendDbsnpId", escape = true, format = "%s") + + optional("-mvq", MendelViolationGenotypeQualityThreshold, spaceSeparated = true, escape = true, format = MendelViolationGenotypeQualityThresholdFormat) + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala new file mode 100644 index 0000000000000000000000000000000000000000..cf1c362c7dd06b2eec25104fdfd3fb5a06dea2f8 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala @@ -0,0 +1,161 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output } + +class VariantEval(val root: Configurable) extends CommandLineGATK { + def analysis_type = "VariantEval" + + /** An output file created by the walker. Will overwrite contents if file exists */ + @Output(fullName = "out", shortName = "o", doc = "An output file created by the walker. Will overwrite contents if file exists", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** Input evaluation file(s) */ + @Input(fullName = "eval", shortName = "eval", doc = "Input evaluation file(s)", required = true, exclusiveOf = "", validation = "") + var eval: Seq[File] = Nil + + /** Input comparison file(s) */ + @Input(fullName = "comp", shortName = "comp", doc = "Input comparison file(s)", required = false, exclusiveOf = "", validation = "") + var comp: Seq[File] = Nil + + /** dbSNP file */ + @Input(fullName = "dbsnp", shortName = "D", doc = "dbSNP file", required = false, exclusiveOf = "", validation = "") + var dbsnp: Option[File] = config("dbsnp") + + /** Evaluations that count calls at sites of true variation (e.g., indel calls) will use this argument as their gold standard for comparison */ + @Input(fullName = "goldStandard", shortName = "gold", doc = "Evaluations that count calls at sites of true variation (e.g., indel calls) will use this argument as their gold standard for comparison", required = false, exclusiveOf = "", validation = "") + var goldStandard: Option[File] = config("goldStandard") + + /** One or more stratifications to use when evaluating the data */ + @Argument(fullName = "select_exps", shortName = "select", doc = "One or more stratifications to use when evaluating the data", required = false, exclusiveOf = "", validation = "") + var select_exps: List[String] = config("select_exps", default = Nil) + + /** Names to use for the list of stratifications (must be a 1-to-1 mapping) */ + @Argument(fullName = "select_names", shortName = "selectName", doc = "Names to use for the list of stratifications (must be a 1-to-1 mapping)", required = false, exclusiveOf = "", validation = "") + var select_names: List[String] = config("select_names", default = Nil) + + /** Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context */ + @Argument(fullName = "sample", shortName = "sn", doc = "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context", required = false, exclusiveOf = "", validation = "") + var sample: List[String] = config("sample", default = Nil, freeVar = false) + + /** Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets */ + @Argument(fullName = "known_names", shortName = "knownName", doc = "Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required = false, exclusiveOf = "", validation = "") + var known_names: List[String] = config("known_names", default = Nil) + + /** One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified) */ + @Argument(fullName = "stratificationModule", shortName = "ST", doc = "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified)", required = false, exclusiveOf = "", validation = "") + var stratificationModule: List[String] = config("stratificationModule", default = Nil) + + /** Do not use the standard stratification modules by default (instead, only those that are specified with the -S option) */ + @Argument(fullName = "doNotUseAllStandardStratifications", shortName = "noST", doc = "Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)", required = false, exclusiveOf = "", validation = "") + var doNotUseAllStandardStratifications: Boolean = config("doNotUseAllStandardStratifications", default = false) + + /** One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noEV is specified) */ + @Argument(fullName = "evalModule", shortName = "EV", doc = "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noEV is specified)", required = false, exclusiveOf = "", validation = "") + var evalModule: List[String] = config("evalModule", default = Nil) + + /** Do not use the standard modules by default (instead, only those that are specified with the -EV option) */ + @Argument(fullName = "doNotUseAllStandardModules", shortName = "noEV", doc = "Do not use the standard modules by default (instead, only those that are specified with the -EV option)", required = false, exclusiveOf = "", validation = "") + var doNotUseAllStandardModules: Boolean = config("doNotUseAllStandardModules", default = false) + + /** Minimum phasing quality */ + @Argument(fullName = "minPhaseQuality", shortName = "mpq", doc = "Minimum phasing quality", required = false, exclusiveOf = "", validation = "") + var minPhaseQuality: Option[Double] = config("minPhaseQuality") + + /** Format string for minPhaseQuality */ + @Argument(fullName = "minPhaseQualityFormat", shortName = "", doc = "Format string for minPhaseQuality", required = false, exclusiveOf = "", validation = "") + var minPhaseQualityFormat: String = "%s" + + /** Minimum genotype QUAL score for each trio member required to accept a site as a violation. Default is 50. */ + @Argument(fullName = "mendelianViolationQualThreshold", shortName = "mvq", doc = "Minimum genotype QUAL score for each trio member required to accept a site as a violation. Default is 50.", required = false, exclusiveOf = "", validation = "") + var mendelianViolationQualThreshold: Option[Double] = config("mendelianViolationQualThreshold") + + /** Format string for mendelianViolationQualThreshold */ + @Argument(fullName = "mendelianViolationQualThresholdFormat", shortName = "", doc = "Format string for mendelianViolationQualThreshold", required = false, exclusiveOf = "", validation = "") + var mendelianViolationQualThresholdFormat: String = "%s" + + /** Per-sample ploidy (number of chromosomes per sample) */ + @Argument(fullName = "samplePloidy", shortName = "ploidy", doc = "Per-sample ploidy (number of chromosomes per sample)", required = false, exclusiveOf = "", validation = "") + var samplePloidy: Option[Int] = config("samplePloidy") + + /** Fasta file with ancestral alleles */ + @Argument(fullName = "ancestralAlignments", shortName = "aa", doc = "Fasta file with ancestral alleles", required = false, exclusiveOf = "", validation = "") + var ancestralAlignments: Option[File] = config("ancestralAlignments") + + /** If provided only comp and eval tracks with exactly matching reference and alternate alleles will be counted as overlapping */ + @Argument(fullName = "requireStrictAlleleMatch", shortName = "strict", doc = "If provided only comp and eval tracks with exactly matching reference and alternate alleles will be counted as overlapping", required = false, exclusiveOf = "", validation = "") + var requireStrictAlleleMatch: Boolean = config("requireStrictAlleleMatch", default = false) + + /** If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes */ + @Argument(fullName = "keepAC0", shortName = "keepAC0", doc = "If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required = false, exclusiveOf = "", validation = "") + var keepAC0: Boolean = config("keepAC0", default = false) + + /** If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes */ + @Argument(fullName = "numSamples", shortName = "numSamples", doc = "If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required = false, exclusiveOf = "", validation = "") + var numSamples: Option[Int] = config("numSamples") + + /** If provided, all -eval tracks will be merged into a single eval track */ + @Argument(fullName = "mergeEvals", shortName = "mergeEvals", doc = "If provided, all -eval tracks will be merged into a single eval track", required = false, exclusiveOf = "", validation = "") + var mergeEvals: Boolean = config("mergeEvals", default = false) + + /** File containing tribble-readable features for the IntervalStratificiation */ + @Input(fullName = "stratIntervals", shortName = "stratIntervals", doc = "File containing tribble-readable features for the IntervalStratificiation", required = false, exclusiveOf = "", validation = "") + var stratIntervals: Option[File] = config("stratIntervals") + + /** File containing tribble-readable features describing a known list of copy number variants */ + @Input(fullName = "knownCNVs", shortName = "knownCNVs", doc = "File containing tribble-readable features describing a known list of copy number variants", required = false, exclusiveOf = "", validation = "") + var knownCNVs: Option[File] = config("knownCNVs") + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + override def beforeGraph() { + super.beforeGraph() + deps ++= eval.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + deps ++= comp.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + dbsnp.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + goldStandard.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + } + + override def cmdLine = super.cmdLine + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + repeat("-eval", eval, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-gold", goldStandard), goldStandard, spaceSeparated = true, escape = true, format = "%s") + + repeat("-select", select_exps, spaceSeparated = true, escape = true, format = "%s") + + repeat("-selectName", select_names, spaceSeparated = true, escape = true, format = "%s") + + repeat("-sn", sample, spaceSeparated = true, escape = true, format = "%s") + + repeat("-knownName", known_names, spaceSeparated = true, escape = true, format = "%s") + + repeat("-ST", stratificationModule, spaceSeparated = true, escape = true, format = "%s") + + conditional(doNotUseAllStandardStratifications, "-noST", escape = true, format = "%s") + + repeat("-EV", evalModule, spaceSeparated = true, escape = true, format = "%s") + + conditional(doNotUseAllStandardModules, "-noEV", escape = true, format = "%s") + + optional("-mpq", minPhaseQuality, spaceSeparated = true, escape = true, format = minPhaseQualityFormat) + + optional("-mvq", mendelianViolationQualThreshold, spaceSeparated = true, escape = true, format = mendelianViolationQualThresholdFormat) + + optional("-ploidy", samplePloidy, spaceSeparated = true, escape = true, format = "%s") + + optional("-aa", ancestralAlignments, spaceSeparated = true, escape = true, format = "%s") + + conditional(requireStrictAlleleMatch, "-strict", escape = true, format = "%s") + + conditional(keepAC0, "-keepAC0", escape = true, format = "%s") + + optional("-numSamples", numSamples, spaceSeparated = true, escape = true, format = "%s") + + conditional(mergeEvals, "-mergeEvals", escape = true, format = "%s") + + optional("-stratIntervals", stratIntervals, spaceSeparated = true, escape = true, format = "%s") + + optional("-knownCNVs", knownCNVs, spaceSeparated = true, escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala new file mode 100644 index 0000000000000000000000000000000000000000..96b5ee4c00fc1350e6298f31d11c2308010b6e23 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala @@ -0,0 +1,213 @@ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output, _ } + +class VariantRecalibrator(val root: Configurable) extends CommandLineGATK { + def analysis_type = "VariantRecalibrator" + + /** Recalibration mode to employ */ + @Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ", required = true, exclusiveOf = "", validation = "") + var mode: String = _ + + /** Max number of Gaussians for the positive model */ + @Argument(fullName = "maxGaussians", shortName = "mG", doc = "Max number of Gaussians for the positive model", required = false, exclusiveOf = "", validation = "") + var maxGaussians: Option[Int] = config("maxGaussians") + + /** Max number of Gaussians for the negative model */ + @Argument(fullName = "maxNegativeGaussians", shortName = "mNG", doc = "Max number of Gaussians for the negative model", required = false, exclusiveOf = "", validation = "") + var maxNegativeGaussians: Option[Int] = config("maxNegativeGaussians") + + /** Maximum number of VBEM iterations */ + @Argument(fullName = "maxIterations", shortName = "mI", doc = "Maximum number of VBEM iterations", required = false, exclusiveOf = "", validation = "") + var maxIterations: Option[Int] = config("maxIterations") + + /** Number of k-means iterations */ + @Argument(fullName = "numKMeans", shortName = "nKM", doc = "Number of k-means iterations", required = false, exclusiveOf = "", validation = "") + var numKMeans: Option[Int] = config("numKMeans") + + /** Annotation value divergence threshold (number of standard deviations from the means) */ + @Argument(fullName = "stdThreshold", shortName = "std", doc = "Annotation value divergence threshold (number of standard deviations from the means) ", required = false, exclusiveOf = "", validation = "") + var stdThreshold: Option[Double] = config("stdThreshold") + + /** Format string for stdThreshold */ + @Argument(fullName = "stdThresholdFormat", shortName = "", doc = "Format string for stdThreshold", required = false, exclusiveOf = "", validation = "") + var stdThresholdFormat: String = "%s" + + /** The shrinkage parameter in the variational Bayes algorithm. */ + @Argument(fullName = "shrinkage", shortName = "shrinkage", doc = "The shrinkage parameter in the variational Bayes algorithm.", required = false, exclusiveOf = "", validation = "") + var shrinkage: Option[Double] = config("shrinkage") + + /** Format string for shrinkage */ + @Argument(fullName = "shrinkageFormat", shortName = "", doc = "Format string for shrinkage", required = false, exclusiveOf = "", validation = "") + var shrinkageFormat: String = "%s" + + /** The dirichlet parameter in the variational Bayes algorithm. */ + @Argument(fullName = "dirichlet", shortName = "dirichlet", doc = "The dirichlet parameter in the variational Bayes algorithm.", required = false, exclusiveOf = "", validation = "") + var dirichlet: Option[Double] = config("dirichlet") + + /** Format string for dirichlet */ + @Argument(fullName = "dirichletFormat", shortName = "", doc = "Format string for dirichlet", required = false, exclusiveOf = "", validation = "") + var dirichletFormat: String = "%s" + + /** The number of prior counts to use in the variational Bayes algorithm. */ + @Argument(fullName = "priorCounts", shortName = "priorCounts", doc = "The number of prior counts to use in the variational Bayes algorithm.", required = false, exclusiveOf = "", validation = "") + var priorCounts: Option[Double] = config("priorCounts") + + /** Format string for priorCounts */ + @Argument(fullName = "priorCountsFormat", shortName = "", doc = "Format string for priorCounts", required = false, exclusiveOf = "", validation = "") + var priorCountsFormat: String = "%s" + + /** Maximum number of training data */ + @Argument(fullName = "maxNumTrainingData", shortName = "maxNumTrainingData", doc = "Maximum number of training data", required = false, exclusiveOf = "", validation = "") + var maxNumTrainingData: Option[Int] = config("maxNumTrainingData") + + /** Minimum number of bad variants */ + @Argument(fullName = "minNumBadVariants", shortName = "minNumBad", doc = "Minimum number of bad variants", required = false, exclusiveOf = "", validation = "") + var minNumBadVariants: Option[Int] = config("minNumBadVariants") + + /** LOD score cutoff for selecting bad variants */ + @Argument(fullName = "badLodCutoff", shortName = "badLodCutoff", doc = "LOD score cutoff for selecting bad variants", required = false, exclusiveOf = "", validation = "") + var badLodCutoff: Option[Double] = config("badLodCutoff") + + /** Format string for badLodCutoff */ + @Argument(fullName = "badLodCutoffFormat", shortName = "", doc = "Format string for badLodCutoff", required = false, exclusiveOf = "", validation = "") + var badLodCutoffFormat: String = "%s" + + /** Apply logit transform and jitter to MQ values */ + @Argument(fullName = "MQCapForLogitJitterTransform", shortName = "MQCap", doc = "Apply logit transform and jitter to MQ values", required = false, exclusiveOf = "", validation = "") + var MQCapForLogitJitterTransform: Option[Int] = config("MQCapForLogitJitterTransform") + + /** MQ is by default transformed to log[(MQ_cap + epsilon - MQ)/(MQ + epsilon)] to make it more Gaussian-like. Use this flag to not do that. */ + @Argument(fullName = "no_MQ_logit", shortName = "NoMQLogit", doc = "MQ is by default transformed to log[(MQ_cap + epsilon - MQ)/(MQ + epsilon)] to make it more Gaussian-like. Use this flag to not do that.", required = false, exclusiveOf = "", validation = "") + var no_MQ_logit: Boolean = config("no_MQ_logit", default = false) + + /** Amount of jitter (as a factor to a Normal(0,1) noise) to add to the MQ capped values */ + @Argument(fullName = "MQ_jitter", shortName = "MQJitt", doc = "Amount of jitter (as a factor to a Normal(0,1) noise) to add to the MQ capped values", required = false, exclusiveOf = "", validation = "") + var MQ_jitter: Option[Double] = config("MQ_jitter", default = false) + + /** Format string for MQ_jitter */ + @Argument(fullName = "MQ_jitterFormat", shortName = "", doc = "Format string for MQ_jitter", required = false, exclusiveOf = "", validation = "") + var MQ_jitterFormat: String = "%s" + + /** The raw input variants to be recalibrated */ + @Input(fullName = "input", shortName = "input", doc = "The raw input variants to be recalibrated", required = true, exclusiveOf = "", validation = "") + var input: Seq[File] = Nil + + /** Additional raw input variants to be used in building the model */ + @Input(fullName = "aggregate", shortName = "aggregate", doc = "Additional raw input variants to be used in building the model", required = false, exclusiveOf = "", validation = "") + var aggregate: List[File] = config("aggregate", default = Nil) + + /** A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run) */ + @Input(fullName = "resource", shortName = "resource", doc = "A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run)", required = true, exclusiveOf = "", validation = "") + var resource: List[File] = config("resource", default = Nil) + + /** The output recal file used by ApplyRecalibration */ + @Output(fullName = "recal_file", shortName = "recalFile", doc = "The output recal file used by ApplyRecalibration", required = true, exclusiveOf = "", validation = "") + @Gather(classOf[CatVariantsGatherer]) + var recal_file: File = _ + + /** The output tranches file used by ApplyRecalibration */ + @Output(fullName = "tranches_file", shortName = "tranchesFile", doc = "The output tranches file used by ApplyRecalibration", required = true, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var tranches_file: File = _ + + /** The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on the optimization curve output figures. (approx 2.15 for whole genome experiments). ONLY USED FOR PLOTTING PURPOSES! */ + @Argument(fullName = "target_titv", shortName = "titv", doc = "The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on the optimization curve output figures. (approx 2.15 for whole genome experiments). ONLY USED FOR PLOTTING PURPOSES!", required = false, exclusiveOf = "", validation = "") + var target_titv: Option[Double] = config("target_titv") + + /** Format string for target_titv */ + @Argument(fullName = "target_titvFormat", shortName = "", doc = "Format string for target_titv", required = false, exclusiveOf = "", validation = "") + var target_titvFormat: String = "%s" + + /** The names of the annotations which should used for calculations */ + @Argument(fullName = "use_annotation", shortName = "an", doc = "The names of the annotations which should used for calculations", required = true, exclusiveOf = "", validation = "") + var use_annotation: List[String] = config("use_annotation", default = Nil) + + /** The levels of truth sensitivity at which to slice the data. (in percent, that is 1.0 for 1 percent) */ + @Argument(fullName = "TStranche", shortName = "tranche", doc = "The levels of truth sensitivity at which to slice the data. (in percent, that is 1.0 for 1 percent)", required = false, exclusiveOf = "", validation = "") + var TStranche: List[Double] = config("TStranche", default = Nil) + + /** If specified, the variant recalibrator will also use variants marked as filtered by the specified filter name in the input VCF file */ + @Argument(fullName = "ignore_filter", shortName = "ignoreFilter", doc = "If specified, the variant recalibrator will also use variants marked as filtered by the specified filter name in the input VCF file", required = false, exclusiveOf = "", validation = "") + var ignore_filter: List[String] = config("ignore_filter", default = Nil) + + /** If specified, the variant recalibrator will ignore all input filters. Useful to rerun the VQSR from a filtered output file. */ + @Argument(fullName = "ignore_all_filters", shortName = "ignoreAllFilters", doc = "If specified, the variant recalibrator will ignore all input filters. Useful to rerun the VQSR from a filtered output file.", required = false, exclusiveOf = "", validation = "") + var ignore_all_filters: Boolean = _ + + /** The output rscript file generated by the VQSR to aid in visualization of the input data and learned model */ + @Output(fullName = "rscript_file", shortName = "rscriptFile", doc = "The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var rscript_file: File = _ + + /** Used to debug the random number generation inside the VQSR. Do not use. */ + @Argument(fullName = "replicate", shortName = "replicate", doc = "Used to debug the random number generation inside the VQSR. Do not use.", required = false, exclusiveOf = "", validation = "") + var replicate: Option[Int] = config("replicate") + + /** Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation. */ + @Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false, exclusiveOf = "", validation = "") + var trustAllPolymorphic: Boolean = config("trustAllPolymorphic", default = false) + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + @Output + @Gather(enabled = false) + private var outputIndex: File = _ + + override def beforeGraph() { + super.beforeGraph() + deps ++= input.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + deps ++= aggregate.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + deps ++= resource.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + if (recal_file != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(recal_file)) + outputIndex = VcfUtils.getVcfIndexFile(recal_file) + } + + override def cmdLine = super.cmdLine + + required("-mode", mode, spaceSeparated = true, escape = true, format = "%s") + + optional("-mG", maxGaussians, spaceSeparated = true, escape = true, format = "%s") + + optional("-mNG", maxNegativeGaussians, spaceSeparated = true, escape = true, format = "%s") + + optional("-mI", maxIterations, spaceSeparated = true, escape = true, format = "%s") + + optional("-nKM", numKMeans, spaceSeparated = true, escape = true, format = "%s") + + optional("-std", stdThreshold, spaceSeparated = true, escape = true, format = stdThresholdFormat) + + optional("-shrinkage", shrinkage, spaceSeparated = true, escape = true, format = shrinkageFormat) + + optional("-dirichlet", dirichlet, spaceSeparated = true, escape = true, format = dirichletFormat) + + optional("-priorCounts", priorCounts, spaceSeparated = true, escape = true, format = priorCountsFormat) + + optional("-maxNumTrainingData", maxNumTrainingData, spaceSeparated = true, escape = true, format = "%s") + + optional("-minNumBad", minNumBadVariants, spaceSeparated = true, escape = true, format = "%s") + + optional("-badLodCutoff", badLodCutoff, spaceSeparated = true, escape = true, format = badLodCutoffFormat) + + optional("-MQCap", MQCapForLogitJitterTransform, spaceSeparated = true, escape = true, format = "%s") + + conditional(no_MQ_logit, "-NoMQLogit", escape = true, format = "%s") + + optional("-MQJitt", MQ_jitter, spaceSeparated = true, escape = true, format = MQ_jitterFormat) + + repeat("-input", input, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + repeat("-aggregate", aggregate, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + repeat("-resource", resource, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + required("-recalFile", recal_file, spaceSeparated = true, escape = true, format = "%s") + + required("-tranchesFile", tranches_file, spaceSeparated = true, escape = true, format = "%s") + + optional("-titv", target_titv, spaceSeparated = true, escape = true, format = target_titvFormat) + + repeat("-an", use_annotation, spaceSeparated = true, escape = true, format = "%s") + + repeat("-tranche", TStranche, spaceSeparated = true, escape = true, format = "%s") + + repeat("-ignoreFilter", ignore_filter, spaceSeparated = true, escape = true, format = "%s") + + conditional(ignore_all_filters, "-ignoreAllFilters", escape = true, format = "%s") + + optional("-rscriptFile", rscript_file, spaceSeparated = true, escape = true, format = "%s") + + optional("-replicate", replicate, spaceSeparated = true, escape = true, format = "%s") + + conditional(trustAllPolymorphic, "-allPoly", escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/GmapBuild.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/GmapBuild.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/GmapBuild.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/GmapBuild.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/Gsnap.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/Gsnap.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/Gsnap.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/Gsnap.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/KrakenReport.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/KrakenReport.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/KrakenReport.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/KrakenReport.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2CallPeak.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2CallPeak.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2CallPeak.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2CallPeak.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/Manwe.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/Manwe.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/Manwe.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/Manwe.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateBed.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateBed.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateBed.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateBed.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateVcf.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateVcf.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateVcf.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateVcf.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesAnnotate.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesAnnotate.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesAnnotate.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesAnnotate.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesDownload.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesDownload.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesDownload.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesDownload.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesList.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesList.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesList.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesList.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesShow.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesShow.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesShow.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesShow.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesActivate.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesActivate.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesActivate.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesActivate.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAdd.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAdd.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAdd.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAdd.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAnnotateVariations.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAnnotateVariations.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAnnotateVariations.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAnnotateVariations.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImport.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImport.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImport.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImport.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportBed.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportBed.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportBed.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportBed.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportVcf.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportVcf.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportVcf.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportVcf.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesList.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesList.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesList.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesList.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesShow.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesShow.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesShow.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesShow.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/AddOrReplaceReadGroups.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/AddOrReplaceReadGroups.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/AddOrReplaceReadGroups.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/AddOrReplaceReadGroups.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BedToIntervalList.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BedToIntervalList.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BedToIntervalList.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BedToIntervalList.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BuildBamIndex.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BuildBamIndex.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BuildBamIndex.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BuildBamIndex.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectMultipleMetrics.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectMultipleMetrics.scala similarity index 99% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectMultipleMetrics.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectMultipleMetrics.scala index fed464a7391ad3f618e3755c2b8addc3cfb608b1..f39300d8d0a4dbd57f80d373a7592f41b76bb45e 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectMultipleMetrics.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectMultipleMetrics.scala @@ -54,6 +54,9 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari @Argument(doc = "Stop after processing N reads", required = false) var stopAfter: Option[Long] = config("stop_after") + @Output + private var outputFiles: List[File] = Nil + override def beforeGraph(): Unit = { super.beforeGraph() if (reference == null) reference = referenceFasta() diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectRnaSeqMetrics.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectRnaSeqMetrics.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectRnaSeqMetrics.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectRnaSeqMetrics.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectTargetedPcrMetrics.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectTargetedPcrMetrics.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectTargetedPcrMetrics.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectTargetedPcrMetrics.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectWgsMetrics.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectWgsMetrics.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectWgsMetrics.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectWgsMetrics.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CreateSequenceDictionary.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CreateSequenceDictionary.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CreateSequenceDictionary.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CreateSequenceDictionary.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/GatherBamFiles.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/GatherBamFiles.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/GatherBamFiles.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/GatherBamFiles.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/ReorderSam.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/ReorderSam.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/ReorderSam.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/ReorderSam.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SamToFastq.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SamToFastq.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SamToFastq.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SamToFastq.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortSam.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortSam.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortSam.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortSam.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortVcf.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortVcf.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortVcf.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortVcf.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelVCF.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelVCF.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelVCF.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelVCF.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/AssignTaxonomy.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/AssignTaxonomy.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/AssignTaxonomy.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/AssignTaxonomy.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/MergeOtuMaps.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/MergeOtuMaps.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/MergeOtuMaps.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/MergeOtuMaps.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/MergeOtuTables.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/MergeOtuTables.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/MergeOtuTables.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/MergeOtuTables.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickClosedReferenceOtus.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickClosedReferenceOtus.scala similarity index 95% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickClosedReferenceOtus.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickClosedReferenceOtus.scala index 07fef8aef1ee831aa258555b2358660a396dea69..16c65021d0a33d5ab89915c17211b959782aaa56 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickClosedReferenceOtus.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickClosedReferenceOtus.scala @@ -19,7 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version } import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.Input +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } /** * Created by pjvan_thof on 12/4/15. @@ -54,6 +54,9 @@ class PickClosedReferenceOtus(val root: Configurable) extends BiopetCommandLineF def otuTable = new File(outputDir, "otu_table.biom") def otuMap = new File(outputDir, "uclust_ref_picked_otus" + File.separator + "seqs_otus.txt") + @Output + private var outputFiles: List[File] = Nil + override def beforeGraph(): Unit = { super.beforeGraph() jobOutputFile = new File(outputDir, ".std.out") diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickOtus.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickOtus.scala similarity index 97% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickOtus.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickOtus.scala index 744c52e29c8f1f9a53c166648d09cad6ae25f3a4..d0a0b235516bc9793398a8ee79023d69066cffdd 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickOtus.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickOtus.scala @@ -17,9 +17,9 @@ package nl.lumc.sasc.biopet.extensions.qiime import java.io.File -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version } import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.Input +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } /** * Created by pjvan_thof on 12/4/15. @@ -98,6 +98,9 @@ class PickOtus(val root: Configurable) extends BiopetCommandLineFunction with Ve def logFile = new File(outputDir, s"${name}_otus.log") def otusTxt = new File(outputDir, s"${name}_otus.txt") + @Output + private var outputFiles: List[File] = Nil + override def beforeGraph(): Unit = { super.beforeGraph() outputFiles :+= clustersFile diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickRepSet.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickRepSet.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickRepSet.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickRepSet.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/SplitLibrariesFastq.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/SplitLibrariesFastq.scala similarity index 94% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/SplitLibrariesFastq.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/SplitLibrariesFastq.scala index d9295dee037381a4ff0504f0a7bf465cebd0f6b7..ce159b70ff3424a0eb0777f3995e4952a5ac562c 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/SplitLibrariesFastq.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/SplitLibrariesFastq.scala @@ -17,9 +17,9 @@ package nl.lumc.sasc.biopet.extensions.qiime import java.io.File -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version } import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.Input +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } /** * Created by pjvan_thof on 12/10/15. @@ -56,13 +56,16 @@ class SplitLibrariesFastq(val root: Configurable) extends BiopetCommandLineFunct def outputSeqs = new File(outputDir, "seqs.fna") + @Output + private var _outputSeqs: File = _ + override def defaultCoreMemory = 4.0 override def beforeGraph(): Unit = { super.beforeGraph() require(input.nonEmpty) require(outputDir != null) - outputFiles :+= outputSeqs + _outputSeqs = outputSeqs } def cmdLine = executable + diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaFlagstat.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaFlagstat.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaFlagstat.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaFlagstat.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaIndex.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaIndex.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaIndex.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaIndex.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaView.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaView.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaView.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaView.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/FixMpileup.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/FixMpileup.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/FixMpileup.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/FixMpileup.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFaidx.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFaidx.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFaidx.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFaidx.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFlagstat.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFlagstat.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFlagstat.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFlagstat.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsSort.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsSort.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsSort.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsSort.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsView.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsView.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsView.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsView.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/Seqtk.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/Seqtk.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/Seqtk.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/Seqtk.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/SeqtkSeq.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/SeqtkSeq.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/SeqtkSeq.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/SeqtkSeq.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/FixMpileup.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/FixMpileup.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/FixMpileup.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/FixMpileup.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/VarscanMpileup2cns.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/VarscanMpileup2cns.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/VarscanMpileup2cns.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/VarscanMpileup2cns.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/Vt.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/Vt.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/Vt.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/Vt.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtDecompose.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtDecompose.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtDecompose.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtDecompose.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtNormalize.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtNormalize.scala similarity index 100% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtNormalize.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtNormalize.scala diff --git a/public/biopet-extensions/src/test/resources/log4j.properties b/biopet-extensions/src/test/resources/log4j.properties similarity index 100% rename from public/biopet-extensions/src/test/resources/log4j.properties rename to biopet-extensions/src/test/resources/log4j.properties diff --git a/public/biopet-extensions/src/test/resources/nl/lumc/sasc/biopet/extensions/picard/picard.alignmentMetrics b/biopet-extensions/src/test/resources/nl/lumc/sasc/biopet/extensions/picard/picard.alignmentMetrics similarity index 100% rename from public/biopet-extensions/src/test/resources/nl/lumc/sasc/biopet/extensions/picard/picard.alignmentMetrics rename to biopet-extensions/src/test/resources/nl/lumc/sasc/biopet/extensions/picard/picard.alignmentMetrics diff --git a/public/biopet-extensions/src/test/resources/nl/lumc/sasc/biopet/extensions/picard/picard.dedup.metrics b/biopet-extensions/src/test/resources/nl/lumc/sasc/biopet/extensions/picard/picard.dedup.metrics similarity index 100% rename from public/biopet-extensions/src/test/resources/nl/lumc/sasc/biopet/extensions/picard/picard.dedup.metrics rename to biopet-extensions/src/test/resources/nl/lumc/sasc/biopet/extensions/picard/picard.dedup.metrics diff --git a/public/biopet-extensions/src/test/resources/nl/lumc/sasc/biopet/extensions/picard/picard.insertsizemetrics b/biopet-extensions/src/test/resources/nl/lumc/sasc/biopet/extensions/picard/picard.insertsizemetrics similarity index 100% rename from public/biopet-extensions/src/test/resources/nl/lumc/sasc/biopet/extensions/picard/picard.insertsizemetrics rename to biopet-extensions/src/test/resources/nl/lumc/sasc/biopet/extensions/picard/picard.insertsizemetrics diff --git a/public/biopet-extensions/src/test/resources/vep.metrics b/biopet-extensions/src/test/resources/vep.metrics similarity index 100% rename from public/biopet-extensions/src/test/resources/vep.metrics rename to biopet-extensions/src/test/resources/vep.metrics diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/BcfToolsTest.scala b/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/BcfToolsTest.scala similarity index 100% rename from public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/BcfToolsTest.scala rename to biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/BcfToolsTest.scala diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/GsnapTest.scala b/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/GsnapTest.scala similarity index 100% rename from public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/GsnapTest.scala rename to biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/GsnapTest.scala diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/HtseqCountTest.scala b/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/HtseqCountTest.scala similarity index 100% rename from public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/HtseqCountTest.scala rename to biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/HtseqCountTest.scala diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/LnTest.scala b/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/LnTest.scala similarity index 100% rename from public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/LnTest.scala rename to biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/LnTest.scala diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/ManweTest.scala b/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/ManweTest.scala similarity index 100% rename from public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/ManweTest.scala rename to biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/ManweTest.scala diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictorTest.scala b/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictorTest.scala similarity index 100% rename from public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictorTest.scala rename to biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictorTest.scala diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala b/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala similarity index 100% rename from public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala rename to biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala b/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala similarity index 100% rename from public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala rename to biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicatesTest.scala b/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicatesTest.scala similarity index 100% rename from public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicatesTest.scala rename to biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicatesTest.scala diff --git a/public/bam2wig/.gitignore b/biopet-package/.gitignore similarity index 100% rename from public/bam2wig/.gitignore rename to biopet-package/.gitignore diff --git a/public/biopet-public-package/pom.xml b/biopet-package/pom.xml similarity index 97% rename from public/biopet-public-package/pom.xml rename to biopet-package/pom.xml index 2272ea90129dd466c9eeaf09dad4a244b2a97965..9aa9e059fac9138178e193b0c8a60eca3dd12c63 100644 --- a/public/biopet-public-package/pom.xml +++ b/biopet-package/pom.xml @@ -19,7 +19,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> - <artifactId>BiopetPublicPackage</artifactId> + <artifactId>BiopetPackage</artifactId> <packaging>jar</packaging> <parent> @@ -30,12 +30,12 @@ </parent> <inceptionYear>2014</inceptionYear> - <name>BiopetPublicPackage</name> + <name>BiopetPackage</name> <url>http://maven.apache.org</url> <properties> <sting.shade.phase>package</sting.shade.phase> - <app.main.class>nl.lumc.sasc.biopet.BiopetExecutablePublic</app.main.class> + <app.main.class>nl.lumc.sasc.biopet.BiopetExecutableMain</app.main.class> </properties> <dependencies> diff --git a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala b/biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala similarity index 81% rename from public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala rename to biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala index 8b21fd806b6ed32cf81d02f7adce43bad41e2562..3fe224735e6541f5b8280a1e56bd11572f4d3ffe 100644 --- a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala +++ b/biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala @@ -15,11 +15,10 @@ */ package nl.lumc.sasc.biopet -import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling import nl.lumc.sasc.biopet.utils.{ BiopetExecutable, MainCommand } -object BiopetExecutablePublic extends BiopetExecutable { - def publicPipelines: List[MainCommand] = List( +object BiopetExecutableMain extends BiopetExecutable { + def pipelines: List[MainCommand] = List( nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep, nl.lumc.sasc.biopet.pipelines.mapping.Mapping, nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMapping, @@ -33,14 +32,11 @@ object BiopetExecutablePublic extends BiopetExecutable { nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling, nl.lumc.sasc.biopet.pipelines.gears.GearsSingle, nl.lumc.sasc.biopet.pipelines.gears.Gears, - nl.lumc.sasc.biopet.pipelines.gwastest.GwasTest + nl.lumc.sasc.biopet.pipelines.gwastest.GwasTest, + nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling, + nl.lumc.sasc.biopet.pipelines.basty.Basty, + nl.lumc.sasc.biopet.pipelines.shiva.Shiva ) - def pipelines: List[MainCommand] = List( - nl.lumc.sasc.biopet.pipelines.shiva.Shiva, - ShivaVariantcalling, - nl.lumc.sasc.biopet.pipelines.basty.Basty - ) ::: publicPipelines - def tools: List[MainCommand] = BiopetToolsExecutable.tools } diff --git a/public/biopet-public-package/src/test/resources/log4j.properties b/biopet-package/src/test/resources/log4j.properties similarity index 100% rename from public/biopet-public-package/src/test/resources/log4j.properties rename to biopet-package/src/test/resources/log4j.properties diff --git a/public/biopet-tools-extensions/pom.xml b/biopet-tools-extensions/pom.xml similarity index 100% rename from public/biopet-tools-extensions/pom.xml rename to biopet-tools-extensions/pom.xml diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BaseCounter.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BaseCounter.scala similarity index 99% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BaseCounter.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BaseCounter.scala index 751053b55bac3e60c09e0b3c0566648cbe997f49..5b23f6d047df7d13f0f2c492720a4a332c8b5b87 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BaseCounter.scala +++ b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BaseCounter.scala @@ -75,6 +75,9 @@ class BaseCounter(val root: Configurable) extends ToolCommandFunction { def strandedSenseMetaExonCounts = new File(outputDir, s"$prefix.base.metaexons.stranded.sense.counts") def strandedAntiSenseMetaExonCounts = new File(outputDir, s"$prefix.base.metaexons.stranded.antisense.counts") + @Output + private var outputFiles: List[File] = Nil + override def beforeGraph(): Unit = { outputFiles ++= List(transcriptTotalCounts, transcriptTotalSenseCounts, transcriptTotalAntiSenseCounts, transcriptExonicCounts, transcriptExonicSenseCounts, transcriptExonicAntiSenseCounts, diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BastyGenerateFasta.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BastyGenerateFasta.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BastyGenerateFasta.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BastyGenerateFasta.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BedToInterval.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BedToInterval.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BedToInterval.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BedToInterval.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BedtoolsCoverageToCounts.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BedtoolsCoverageToCounts.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BedtoolsCoverageToCounts.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BedtoolsCoverageToCounts.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BiopetFlagstat.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BiopetFlagstat.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BiopetFlagstat.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BiopetFlagstat.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSplitter.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSplitter.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSplitter.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSplitter.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSync.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSync.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSync.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSync.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GensToVcf.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GensToVcf.scala similarity index 90% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GensToVcf.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GensToVcf.scala index 10e5eca751baceeb659e3f602efa0fdc5093b9d0..8ceed9500621b7659b9907a66b6892532ea1137a 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GensToVcf.scala +++ b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GensToVcf.scala @@ -18,9 +18,9 @@ package nl.lumc.sasc.biopet.extensions.tools import java.io.File import nl.lumc.sasc.biopet.core.{ Reference, ToolCommandFunction } -import nl.lumc.sasc.biopet.utils.Logging +import nl.lumc.sasc.biopet.utils.{ Logging, VcfUtils } import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Output, Input } +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } /** * @@ -49,11 +49,14 @@ class GensToVcf(val root: Configurable) extends ToolCommandFunction with Referen override def defaultCoreMemory = 6.0 + @Output + private var outputIndex: File = _ + override def beforeGraph(): Unit = { super.beforeGraph() if (reference == null) reference = referenceFasta() if (contig == null) throw new IllegalStateException("Contig is missing") - if (outputVcf.getName.endsWith(".vcf.gz")) outputFiles :+= new File(outputVcf.getAbsolutePath + ".tbi") + outputIndex = VcfUtils.getVcfIndexFile(outputVcf) } override def setupRetry(): Unit = { diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GvcfToBed.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GvcfToBed.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GvcfToBed.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GvcfToBed.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeAlleles.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeAlleles.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeAlleles.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeAlleles.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeOtuMaps.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeOtuMaps.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeOtuMaps.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeOtuMaps.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/PrefixFastq.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/PrefixFastq.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/PrefixFastq.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/PrefixFastq.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SageCountFastq.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SageCountFastq.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SageCountFastq.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SageCountFastq.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SageCreateLibrary.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SageCreateLibrary.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SageCreateLibrary.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SageCreateLibrary.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SageCreateTagCounts.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SageCreateTagCounts.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SageCreateTagCounts.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SageCreateTagCounts.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SeqStat.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SeqStat.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SeqStat.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SeqStat.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SnptestToVcf.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SnptestToVcf.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SnptestToVcf.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SnptestToVcf.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SquishBed.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SquishBed.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SquishBed.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SquishBed.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/ValidateFastq.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/ValidateFastq.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/ValidateFastq.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/ValidateFastq.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfWithVcf.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfWithVcf.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfWithVcf.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfWithVcf.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VepNormalizer.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VepNormalizer.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VepNormalizer.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VepNormalizer.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/WipeReads.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/WipeReads.scala similarity index 100% rename from public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/WipeReads.scala rename to biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/WipeReads.scala diff --git a/public/biopet-tools-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilterTest.scala b/biopet-tools-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilterTest.scala similarity index 78% rename from public/biopet-tools-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilterTest.scala rename to biopet-tools-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilterTest.scala index cc98b0c7ad39f729bae0af7ed8ff90604f82e5b3..5444ade3b60a58c4e94a312e6c4d24a156c42c84 100644 --- a/public/biopet-tools-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilterTest.scala +++ b/biopet-tools-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilterTest.scala @@ -19,7 +19,7 @@ import java.io.File import org.scalatest.Matchers import org.scalatest.testng.TestNGSuite -import org.testng.annotations.{ DataProvider, Test } +import org.testng.annotations.Test /** * Created by ahbbollen on 2-3-16. @@ -44,34 +44,26 @@ class VcfFilterTest extends TestNGSuite with Matchers { filterer.outputVcfIndex.getAbsolutePath shouldBe oVcf.getAbsolutePath + ".tbi" } - @DataProvider(name = "functions") - def functions = { - Array( - () => testCommand(minSampleDepth = Some(2)), - () => testCommand(minTotalDepth = Some(2)), - () => testCommand(minAlternateDepth = Some(2)), - () => testCommand(minSamplesPass = Some(2)), - () => testCommand(minGenomeQuality = Some(50)), - () => testCommand(filterRefCalls = true), - () => testCommand(invertedOutputVcf = Some(File.createTempFile("vcfFilter", ".vcf"))), - () => testCommand(resToDom = Some("dummy")), - () => testCommand(trioCompound = Some("dummy")), - () => testCommand(deNovoInSample = Some("dummy")), - () => testCommand(deNovoTrio = Some("dummy")), - () => testCommand(trioLossOfHet = Some("dummy")), - () => testCommand(mustHaveVariant = List("sample1", "sample2")), - () => testCommand(calledIn = List("sample1", "sample2")), - () => testCommand(mustHaveGenotype = List("sample1:HET", "sample2:HET")), - () => testCommand(diffGenotype = List("sample1:sample2", "sample2:sample3")), - () => testCommand(minQualScore = Some(50.0)), - () => testCommand(filterHetVarToHomVar = List("dummy")), - () => testCommand(id = List("rs01", "rs02")), - () => testCommand(idFile = Some(File.createTempFile("vcfFilter", ".txt"))) - ).map(Array(_)) - } - - @Test(dataProvider = "functions") - def executer(function0: Function0[Unit]): Unit = function0() + @Test def testMinSampleDepth() = testCommand(minSampleDepth = Some(2)) + @Test def testMinTotalDepth() = testCommand(minTotalDepth = Some(2)) + @Test def testMinAlternateDepth() = testCommand(minAlternateDepth = Some(2)) + @Test def testMinSamplesPass() = testCommand(minSamplesPass = Some(2)) + @Test def testMinGenomeQuality() = testCommand(minGenomeQuality = Some(50)) + @Test def testFilterRefCalls() = testCommand(filterRefCalls = true) + @Test def testInvertedOutputVcf() = testCommand(invertedOutputVcf = Some(File.createTempFile("vcfFilter", ".vcf"))) + @Test def testResToDom() = testCommand(resToDom = Some("dummy")) + @Test def testTrioCompound() = testCommand(trioCompound = Some("dummy")) + @Test def testDeNovoInSample() = testCommand(deNovoInSample = Some("dummy")) + @Test def testDeNovoTrio() = testCommand(deNovoTrio = Some("dummy")) + @Test def testTrioLossOfHet() = testCommand(trioLossOfHet = Some("dummy")) + @Test def testMustHaveVariant() = testCommand(mustHaveVariant = List("sample1", "sample2")) + @Test def testCalledIn() = testCommand(calledIn = List("sample1", "sample2")) + @Test def testMustHaveGenotype() = testCommand(mustHaveGenotype = List("sample1:HET", "sample2:HET")) + @Test def testDiffGenotype() = testCommand(diffGenotype = List("sample1:sample2", "sample2:sample3")) + @Test def testMinQualScore() = testCommand(minQualScore = Some(50.0)) + @Test def testFilterHetVarToHomVar() = testCommand(filterHetVarToHomVar = List("dummy")) + @Test def testId() = testCommand(id = List("rs01", "rs02")) + @Test def testIdFile() = testCommand(idFile = Some(File.createTempFile("vcfFilter", ".txt"))) protected def testCommand(minSampleDepth: Option[Int] = None, minTotalDepth: Option[Int] = None, diff --git a/public/biopet-tools-package/pom.xml b/biopet-tools-package/pom.xml similarity index 100% rename from public/biopet-tools-package/pom.xml rename to biopet-tools-package/pom.xml diff --git a/public/biopet-tools-package/src/main/scala/nl/lumc/sasc/biopet/BiopetToolsExecutable.scala b/biopet-tools-package/src/main/scala/nl/lumc/sasc/biopet/BiopetToolsExecutable.scala similarity index 100% rename from public/biopet-tools-package/src/main/scala/nl/lumc/sasc/biopet/BiopetToolsExecutable.scala rename to biopet-tools-package/src/main/scala/nl/lumc/sasc/biopet/BiopetToolsExecutable.scala diff --git a/public/biopet-tools/pom.xml b/biopet-tools/pom.xml similarity index 88% rename from public/biopet-tools/pom.xml rename to biopet-tools/pom.xml index b7e49a3b7c290129cc7bc0082e419c16d8b6aa4b..889cbbed421a7569ba206f972570e360cf6d11f7 100644 --- a/public/biopet-tools/pom.xml +++ b/biopet-tools/pom.xml @@ -29,13 +29,6 @@ <artifactId>BiopetTools</artifactId> - <repositories> - <repository> - <id>biojava-maven-repo</id> - <name>BioJava repository</name> - <url>http://www.biojava.org/download/maven/</url> - </repository> - </repositories> <dependencies> <dependency> <groupId>org.testng</groupId> @@ -67,8 +60,8 @@ </dependency> <dependency> <groupId>org.biojava</groupId> - <artifactId>biojava3-sequencing</artifactId> - <version>3.1.0</version> + <artifactId>biojava3-core</artifactId> + <version>3.0</version> </dependency> <dependency> <groupId>com.github.broadinstitute</groupId> diff --git a/public/biopet-tools/src/main/resources/log4j.properties b/biopet-tools/src/main/resources/log4j.properties similarity index 100% rename from public/biopet-tools/src/main/resources/log4j.properties rename to biopet-tools/src/main/resources/log4j.properties diff --git a/public/biopet-tools/src/main/resources/nl/lumc/sasc/biopet/tools/plotHeatmap.R b/biopet-tools/src/main/resources/nl/lumc/sasc/biopet/tools/plotHeatmap.R similarity index 100% rename from public/biopet-tools/src/main/resources/nl/lumc/sasc/biopet/tools/plotHeatmap.R rename to biopet-tools/src/main/resources/nl/lumc/sasc/biopet/tools/plotHeatmap.R diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBed.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBed.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBed.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBed.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeOtuMaps.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeOtuMaps.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeOtuMaps.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeOtuMaps.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/PrefixFastq.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/PrefixFastq.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/PrefixFastq.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/PrefixFastq.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SnptestToVcf.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SnptestToVcf.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SnptestToVcf.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SnptestToVcf.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ValidateFastq.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ValidateFastq.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ValidateFastq.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ValidateFastq.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala similarity index 100% rename from public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala rename to biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala diff --git a/public/biopet-tools/src/test/resources/README.txt b/biopet-tools/src/test/resources/README.txt similarity index 100% rename from public/biopet-tools/src/test/resources/README.txt rename to biopet-tools/src/test/resources/README.txt diff --git a/public/biopet-tools/src/test/resources/VCFv3.vcf b/biopet-tools/src/test/resources/VCFv3.vcf similarity index 100% rename from public/biopet-tools/src/test/resources/VCFv3.vcf rename to biopet-tools/src/test/resources/VCFv3.vcf diff --git a/public/biopet-tools/src/test/resources/VEP_oneline.vcf b/biopet-tools/src/test/resources/VEP_oneline.vcf similarity index 100% rename from public/biopet-tools/src/test/resources/VEP_oneline.vcf rename to biopet-tools/src/test/resources/VEP_oneline.vcf diff --git a/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz b/biopet-tools/src/test/resources/VEP_oneline.vcf.gz similarity index 100% rename from public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz rename to biopet-tools/src/test/resources/VEP_oneline.vcf.gz diff --git a/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz.tbi b/biopet-tools/src/test/resources/VEP_oneline.vcf.gz.tbi similarity index 100% rename from public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz.tbi rename to biopet-tools/src/test/resources/VEP_oneline.vcf.gz.tbi diff --git a/public/biopet-tools/src/test/resources/chrQ.refflat b/biopet-tools/src/test/resources/chrQ.refflat similarity index 100% rename from public/biopet-tools/src/test/resources/chrQ.refflat rename to biopet-tools/src/test/resources/chrQ.refflat diff --git a/public/biopet-tools/src/test/resources/chrQ.vcf b/biopet-tools/src/test/resources/chrQ.vcf similarity index 100% rename from public/biopet-tools/src/test/resources/chrQ.vcf rename to biopet-tools/src/test/resources/chrQ.vcf diff --git a/public/biopet-tools/src/test/resources/chrQ.vcf.gz b/biopet-tools/src/test/resources/chrQ.vcf.gz similarity index 100% rename from public/biopet-tools/src/test/resources/chrQ.vcf.gz rename to biopet-tools/src/test/resources/chrQ.vcf.gz diff --git a/public/biopet-tools/src/test/resources/chrQ.vcf.gz.tbi b/biopet-tools/src/test/resources/chrQ.vcf.gz.tbi similarity index 100% rename from public/biopet-tools/src/test/resources/chrQ.vcf.gz.tbi rename to biopet-tools/src/test/resources/chrQ.vcf.gz.tbi diff --git a/public/biopet-tools/src/test/resources/chrQ2.vcf b/biopet-tools/src/test/resources/chrQ2.vcf similarity index 100% rename from public/biopet-tools/src/test/resources/chrQ2.vcf rename to biopet-tools/src/test/resources/chrQ2.vcf diff --git a/public/biopet-tools/src/test/resources/chrQ2.vcf.gz b/biopet-tools/src/test/resources/chrQ2.vcf.gz similarity index 100% rename from public/biopet-tools/src/test/resources/chrQ2.vcf.gz rename to biopet-tools/src/test/resources/chrQ2.vcf.gz diff --git a/public/biopet-tools/src/test/resources/chrQ2.vcf.gz.tbi b/biopet-tools/src/test/resources/chrQ2.vcf.gz.tbi similarity index 100% rename from public/biopet-tools/src/test/resources/chrQ2.vcf.gz.tbi rename to biopet-tools/src/test/resources/chrQ2.vcf.gz.tbi diff --git a/public/biopet-tools/src/test/resources/chrQ_allN.fa b/biopet-tools/src/test/resources/chrQ_allN.fa similarity index 100% rename from public/biopet-tools/src/test/resources/chrQ_allN.fa rename to biopet-tools/src/test/resources/chrQ_allN.fa diff --git a/public/biopet-tools/src/test/resources/chrQ_allN.fa.fai b/biopet-tools/src/test/resources/chrQ_allN.fa.fai similarity index 100% rename from public/biopet-tools/src/test/resources/chrQ_allN.fa.fai rename to biopet-tools/src/test/resources/chrQ_allN.fa.fai diff --git a/public/biopet-tools/src/test/resources/empty.bai b/biopet-tools/src/test/resources/empty.bai similarity index 100% rename from public/biopet-tools/src/test/resources/empty.bai rename to biopet-tools/src/test/resources/empty.bai diff --git a/public/biopet-tools/src/test/resources/empty.bam b/biopet-tools/src/test/resources/empty.bam similarity index 100% rename from public/biopet-tools/src/test/resources/empty.bam rename to biopet-tools/src/test/resources/empty.bam diff --git a/public/biopet-tools/src/test/resources/fake_chrQ.dict b/biopet-tools/src/test/resources/fake_chrQ.dict similarity index 100% rename from public/biopet-tools/src/test/resources/fake_chrQ.dict rename to biopet-tools/src/test/resources/fake_chrQ.dict diff --git a/public/biopet-tools/src/test/resources/fake_chrQ.fa b/biopet-tools/src/test/resources/fake_chrQ.fa similarity index 100% rename from public/biopet-tools/src/test/resources/fake_chrQ.fa rename to biopet-tools/src/test/resources/fake_chrQ.fa diff --git a/public/biopet-tools/src/test/resources/fake_chrQ.fa.fai b/biopet-tools/src/test/resources/fake_chrQ.fa.fai similarity index 100% rename from public/biopet-tools/src/test/resources/fake_chrQ.fa.fai rename to biopet-tools/src/test/resources/fake_chrQ.fa.fai diff --git a/public/biopet-tools/src/test/resources/flagstat_crossreport.txt b/biopet-tools/src/test/resources/flagstat_crossreport.txt similarity index 100% rename from public/biopet-tools/src/test/resources/flagstat_crossreport.txt rename to biopet-tools/src/test/resources/flagstat_crossreport.txt diff --git a/public/biopet-tools/src/test/resources/flagstat_crosstrue.txt b/biopet-tools/src/test/resources/flagstat_crosstrue.txt similarity index 100% rename from public/biopet-tools/src/test/resources/flagstat_crosstrue.txt rename to biopet-tools/src/test/resources/flagstat_crosstrue.txt diff --git a/public/biopet-tools/src/test/resources/flagstat_report.txt b/biopet-tools/src/test/resources/flagstat_report.txt similarity index 100% rename from public/biopet-tools/src/test/resources/flagstat_report.txt rename to biopet-tools/src/test/resources/flagstat_report.txt diff --git a/public/biopet-tools/src/test/resources/flagstat_summary.txt b/biopet-tools/src/test/resources/flagstat_summary.txt similarity index 100% rename from public/biopet-tools/src/test/resources/flagstat_summary.txt rename to biopet-tools/src/test/resources/flagstat_summary.txt diff --git a/public/biopet-tools/src/test/resources/gens.samples b/biopet-tools/src/test/resources/gens.samples similarity index 100% rename from public/biopet-tools/src/test/resources/gens.samples rename to biopet-tools/src/test/resources/gens.samples diff --git a/public/biopet-tools/src/test/resources/log4j.properties b/biopet-tools/src/test/resources/log4j.properties similarity index 100% rename from public/biopet-tools/src/test/resources/log4j.properties rename to biopet-tools/src/test/resources/log4j.properties diff --git a/public/biopet-tools/src/test/resources/mini.transcriptome.fa b/biopet-tools/src/test/resources/mini.transcriptome.fa similarity index 100% rename from public/biopet-tools/src/test/resources/mini.transcriptome.fa rename to biopet-tools/src/test/resources/mini.transcriptome.fa diff --git a/public/biopet-tools/src/test/resources/no_sample.tsv b/biopet-tools/src/test/resources/no_sample.tsv similarity index 100% rename from public/biopet-tools/src/test/resources/no_sample.tsv rename to biopet-tools/src/test/resources/no_sample.tsv diff --git a/public/biopet-tools/src/test/resources/number.tsv b/biopet-tools/src/test/resources/number.tsv similarity index 100% rename from public/biopet-tools/src/test/resources/number.tsv rename to biopet-tools/src/test/resources/number.tsv diff --git a/public/biopet-tools/src/test/resources/paired01.bam b/biopet-tools/src/test/resources/paired01.bam similarity index 100% rename from public/biopet-tools/src/test/resources/paired01.bam rename to biopet-tools/src/test/resources/paired01.bam diff --git a/public/biopet-tools/src/test/resources/paired01.bam.bai b/biopet-tools/src/test/resources/paired01.bam.bai similarity index 100% rename from public/biopet-tools/src/test/resources/paired01.bam.bai rename to biopet-tools/src/test/resources/paired01.bam.bai diff --git a/public/biopet-tools/src/test/resources/paired01.pileup b/biopet-tools/src/test/resources/paired01.pileup similarity index 100% rename from public/biopet-tools/src/test/resources/paired01.pileup rename to biopet-tools/src/test/resources/paired01.pileup diff --git a/public/biopet-tools/src/test/resources/paired01.sam b/biopet-tools/src/test/resources/paired01.sam similarity index 100% rename from public/biopet-tools/src/test/resources/paired01.sam rename to biopet-tools/src/test/resources/paired01.sam diff --git a/public/biopet-tools/src/test/resources/paired01a.fq b/biopet-tools/src/test/resources/paired01a.fq similarity index 100% rename from public/biopet-tools/src/test/resources/paired01a.fq rename to biopet-tools/src/test/resources/paired01a.fq diff --git a/public/biopet-tools/src/test/resources/paired01b.fq b/biopet-tools/src/test/resources/paired01b.fq similarity index 100% rename from public/biopet-tools/src/test/resources/paired01b.fq rename to biopet-tools/src/test/resources/paired01b.fq diff --git a/public/biopet-tools/src/test/resources/paired01c.fq b/biopet-tools/src/test/resources/paired01c.fq similarity index 100% rename from public/biopet-tools/src/test/resources/paired01c.fq rename to biopet-tools/src/test/resources/paired01c.fq diff --git a/public/biopet-tools/src/test/resources/paired02.bam b/biopet-tools/src/test/resources/paired02.bam similarity index 100% rename from public/biopet-tools/src/test/resources/paired02.bam rename to biopet-tools/src/test/resources/paired02.bam diff --git a/public/biopet-tools/src/test/resources/paired02.bam.bai b/biopet-tools/src/test/resources/paired02.bam.bai similarity index 100% rename from public/biopet-tools/src/test/resources/paired02.bam.bai rename to biopet-tools/src/test/resources/paired02.bam.bai diff --git a/public/biopet-tools/src/test/resources/paired02.sam b/biopet-tools/src/test/resources/paired02.sam similarity index 100% rename from public/biopet-tools/src/test/resources/paired02.sam rename to biopet-tools/src/test/resources/paired02.sam diff --git a/public/biopet-tools/src/test/resources/paired03.bam b/biopet-tools/src/test/resources/paired03.bam similarity index 100% rename from public/biopet-tools/src/test/resources/paired03.bam rename to biopet-tools/src/test/resources/paired03.bam diff --git a/public/biopet-tools/src/test/resources/paired03.bam.bai b/biopet-tools/src/test/resources/paired03.bam.bai similarity index 100% rename from public/biopet-tools/src/test/resources/paired03.bam.bai rename to biopet-tools/src/test/resources/paired03.bam.bai diff --git a/public/biopet-tools/src/test/resources/paired03.sam b/biopet-tools/src/test/resources/paired03.sam similarity index 100% rename from public/biopet-tools/src/test/resources/paired03.sam rename to biopet-tools/src/test/resources/paired03.sam diff --git a/public/biopet-tools/src/test/resources/rrna01.bed b/biopet-tools/src/test/resources/rrna01.bed similarity index 100% rename from public/biopet-tools/src/test/resources/rrna01.bed rename to biopet-tools/src/test/resources/rrna01.bed diff --git a/public/biopet-tools/src/test/resources/rrna01.gtf b/biopet-tools/src/test/resources/rrna01.gtf similarity index 100% rename from public/biopet-tools/src/test/resources/rrna01.gtf rename to biopet-tools/src/test/resources/rrna01.gtf diff --git a/public/biopet-tools/src/test/resources/rrna01.refFlat b/biopet-tools/src/test/resources/rrna01.refFlat similarity index 100% rename from public/biopet-tools/src/test/resources/rrna01.refFlat rename to biopet-tools/src/test/resources/rrna01.refFlat diff --git a/public/biopet-tools/src/test/resources/rrna02.bed b/biopet-tools/src/test/resources/rrna02.bed similarity index 100% rename from public/biopet-tools/src/test/resources/rrna02.bed rename to biopet-tools/src/test/resources/rrna02.bed diff --git a/public/biopet-tools/src/test/resources/sageAllGenesTest.tsv b/biopet-tools/src/test/resources/sageAllGenesTest.tsv similarity index 100% rename from public/biopet-tools/src/test/resources/sageAllGenesTest.tsv rename to biopet-tools/src/test/resources/sageAllGenesTest.tsv diff --git a/public/biopet-tools/src/test/resources/sageNoAntiTest.tsv b/biopet-tools/src/test/resources/sageNoAntiTest.tsv similarity index 100% rename from public/biopet-tools/src/test/resources/sageNoAntiTest.tsv rename to biopet-tools/src/test/resources/sageNoAntiTest.tsv diff --git a/public/biopet-tools/src/test/resources/sageNoTagsTest.tsv b/biopet-tools/src/test/resources/sageNoTagsTest.tsv similarity index 100% rename from public/biopet-tools/src/test/resources/sageNoTagsTest.tsv rename to biopet-tools/src/test/resources/sageNoTagsTest.tsv diff --git a/public/biopet-tools/src/test/resources/sageTest.tsv b/biopet-tools/src/test/resources/sageTest.tsv similarity index 100% rename from public/biopet-tools/src/test/resources/sageTest.tsv rename to biopet-tools/src/test/resources/sageTest.tsv diff --git a/public/biopet-tools/src/test/resources/same.tsv b/biopet-tools/src/test/resources/same.tsv similarity index 100% rename from public/biopet-tools/src/test/resources/same.tsv rename to biopet-tools/src/test/resources/same.tsv diff --git a/public/biopet-tools/src/test/resources/sample.tsv b/biopet-tools/src/test/resources/sample.tsv similarity index 100% rename from public/biopet-tools/src/test/resources/sample.tsv rename to biopet-tools/src/test/resources/sample.tsv diff --git a/public/biopet-tools/src/test/resources/single01.bam b/biopet-tools/src/test/resources/single01.bam similarity index 100% rename from public/biopet-tools/src/test/resources/single01.bam rename to biopet-tools/src/test/resources/single01.bam diff --git a/public/biopet-tools/src/test/resources/single01.bam.bai b/biopet-tools/src/test/resources/single01.bam.bai similarity index 100% rename from public/biopet-tools/src/test/resources/single01.bam.bai rename to biopet-tools/src/test/resources/single01.bam.bai diff --git a/public/biopet-tools/src/test/resources/single01.fq b/biopet-tools/src/test/resources/single01.fq similarity index 100% rename from public/biopet-tools/src/test/resources/single01.fq rename to biopet-tools/src/test/resources/single01.fq diff --git a/public/biopet-tools/src/test/resources/single01.sam b/biopet-tools/src/test/resources/single01.sam similarity index 100% rename from public/biopet-tools/src/test/resources/single01.sam rename to biopet-tools/src/test/resources/single01.sam diff --git a/public/biopet-tools/src/test/resources/single02.bam b/biopet-tools/src/test/resources/single02.bam similarity index 100% rename from public/biopet-tools/src/test/resources/single02.bam rename to biopet-tools/src/test/resources/single02.bam diff --git a/public/biopet-tools/src/test/resources/single02.bam.bai b/biopet-tools/src/test/resources/single02.bam.bai similarity index 100% rename from public/biopet-tools/src/test/resources/single02.bam.bai rename to biopet-tools/src/test/resources/single02.bam.bai diff --git a/public/biopet-tools/src/test/resources/single02.sam b/biopet-tools/src/test/resources/single02.sam similarity index 100% rename from public/biopet-tools/src/test/resources/single02.sam rename to biopet-tools/src/test/resources/single02.sam diff --git a/public/biopet-tools/src/test/resources/single03.bam b/biopet-tools/src/test/resources/single03.bam similarity index 100% rename from public/biopet-tools/src/test/resources/single03.bam rename to biopet-tools/src/test/resources/single03.bam diff --git a/public/biopet-tools/src/test/resources/single03.bam.bai b/biopet-tools/src/test/resources/single03.bam.bai similarity index 100% rename from public/biopet-tools/src/test/resources/single03.bam.bai rename to biopet-tools/src/test/resources/single03.bam.bai diff --git a/public/biopet-tools/src/test/resources/single03.sam b/biopet-tools/src/test/resources/single03.sam similarity index 100% rename from public/biopet-tools/src/test/resources/single03.sam rename to biopet-tools/src/test/resources/single03.sam diff --git a/public/biopet-tools/src/test/resources/single04.bam b/biopet-tools/src/test/resources/single04.bam similarity index 100% rename from public/biopet-tools/src/test/resources/single04.bam rename to biopet-tools/src/test/resources/single04.bam diff --git a/public/biopet-tools/src/test/resources/single04.bam.bai b/biopet-tools/src/test/resources/single04.bam.bai similarity index 100% rename from public/biopet-tools/src/test/resources/single04.bam.bai rename to biopet-tools/src/test/resources/single04.bam.bai diff --git a/public/biopet-tools/src/test/resources/single04.sam b/biopet-tools/src/test/resources/single04.sam similarity index 100% rename from public/biopet-tools/src/test/resources/single04.sam rename to biopet-tools/src/test/resources/single04.sam diff --git a/public/biopet-tools/src/test/resources/single05.bam b/biopet-tools/src/test/resources/single05.bam similarity index 100% rename from public/biopet-tools/src/test/resources/single05.bam rename to biopet-tools/src/test/resources/single05.bam diff --git a/public/biopet-tools/src/test/resources/single05.bam.bai b/biopet-tools/src/test/resources/single05.bam.bai similarity index 100% rename from public/biopet-tools/src/test/resources/single05.bam.bai rename to biopet-tools/src/test/resources/single05.bam.bai diff --git a/public/biopet-tools/src/test/resources/single05.sam b/biopet-tools/src/test/resources/single05.sam similarity index 100% rename from public/biopet-tools/src/test/resources/single05.sam rename to biopet-tools/src/test/resources/single05.sam diff --git a/public/biopet-tools/src/test/resources/tagCount.tsv b/biopet-tools/src/test/resources/tagCount.tsv similarity index 100% rename from public/biopet-tools/src/test/resources/tagCount.tsv rename to biopet-tools/src/test/resources/tagCount.tsv diff --git a/public/biopet-tools/src/test/resources/test.empty.snptest b/biopet-tools/src/test/resources/test.empty.snptest similarity index 100% rename from public/biopet-tools/src/test/resources/test.empty.snptest rename to biopet-tools/src/test/resources/test.empty.snptest diff --git a/public/biopet-tools/src/test/resources/test.gens b/biopet-tools/src/test/resources/test.gens similarity index 100% rename from public/biopet-tools/src/test/resources/test.gens rename to biopet-tools/src/test/resources/test.gens diff --git a/public/biopet-tools/src/test/resources/test.gens_info b/biopet-tools/src/test/resources/test.gens_info similarity index 100% rename from public/biopet-tools/src/test/resources/test.gens_info rename to biopet-tools/src/test/resources/test.gens_info diff --git a/public/biopet-tools/src/test/resources/test.snptest b/biopet-tools/src/test/resources/test.snptest similarity index 100% rename from public/biopet-tools/src/test/resources/test.snptest rename to biopet-tools/src/test/resources/test.snptest diff --git a/public/biopet-tools/src/test/resources/test.summary.json b/biopet-tools/src/test/resources/test.summary.json similarity index 100% rename from public/biopet-tools/src/test/resources/test.summary.json rename to biopet-tools/src/test/resources/test.summary.json diff --git a/public/biopet-tools/src/test/resources/unvep_online.vcf b/biopet-tools/src/test/resources/unvep_online.vcf similarity index 100% rename from public/biopet-tools/src/test/resources/unvep_online.vcf rename to biopet-tools/src/test/resources/unvep_online.vcf diff --git a/public/biopet-tools/src/test/resources/unvep_online.vcf.gz b/biopet-tools/src/test/resources/unvep_online.vcf.gz similarity index 100% rename from public/biopet-tools/src/test/resources/unvep_online.vcf.gz rename to biopet-tools/src/test/resources/unvep_online.vcf.gz diff --git a/public/biopet-tools/src/test/resources/unvep_online.vcf.gz.tbi b/biopet-tools/src/test/resources/unvep_online.vcf.gz.tbi similarity index 100% rename from public/biopet-tools/src/test/resources/unvep_online.vcf.gz.tbi rename to biopet-tools/src/test/resources/unvep_online.vcf.gz.tbi diff --git a/public/biopet-tools/src/test/resources/unvepped.vcf b/biopet-tools/src/test/resources/unvepped.vcf similarity index 100% rename from public/biopet-tools/src/test/resources/unvepped.vcf rename to biopet-tools/src/test/resources/unvepped.vcf diff --git a/public/biopet-tools/src/test/resources/unvepped.vcf.gz b/biopet-tools/src/test/resources/unvepped.vcf.gz similarity index 100% rename from public/biopet-tools/src/test/resources/unvepped.vcf.gz rename to biopet-tools/src/test/resources/unvepped.vcf.gz diff --git a/public/biopet-tools/src/test/resources/unvepped.vcf.gz.tbi b/biopet-tools/src/test/resources/unvepped.vcf.gz.tbi similarity index 100% rename from public/biopet-tools/src/test/resources/unvepped.vcf.gz.tbi rename to biopet-tools/src/test/resources/unvepped.vcf.gz.tbi diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBedTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBedTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBedTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBedTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BaseCounterTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BaseCounterTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BaseCounterTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BaseCounterTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstatTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstatTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstatTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstatTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBamTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBamTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBamTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBamTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSplitterTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSplitterTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSplitterTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSplitterTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBioTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBioTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBioTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBioTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GensToVcfTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GensToVcfTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GensToVcfTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GensToVcfTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GvcfToBedTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GvcfToBedTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GvcfToBedTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GvcfToBedTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MergeAllelesTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MergeAllelesTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MergeAllelesTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MergeAllelesTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MergeTablesTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MergeTablesTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MergeTablesTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MergeTablesTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MpileupToVcfTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MpileupToVcfTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MpileupToVcfTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MpileupToVcfTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/PrefixFastqTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/PrefixFastqTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/PrefixFastqTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/PrefixFastqTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCountFastqTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCountFastqTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCountFastqTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCountFastqTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateLibaryTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateLibaryTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateLibaryTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateLibaryTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCountsTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCountsTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCountsTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCountsTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJsonTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJsonTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJsonTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJsonTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SeqStatTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SeqStatTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SeqStatTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SeqStatTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SnptestToVcfTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SnptestToVcfTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SnptestToVcfTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SnptestToVcfTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SummaryToTsvTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SummaryToTsvTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SummaryToTsvTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SummaryToTsvTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/ValidateFastqTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/ValidateFastqTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/ValidateFastqTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/ValidateFastqTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfStatsTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfStatsTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfStatsTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfStatsTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfToTsvTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfToTsvTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfToTsvTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfToTsvTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VepNormalizerTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VepNormalizerTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VepNormalizerTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VepNormalizerTest.scala diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/WipeReadsTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/WipeReadsTest.scala similarity index 100% rename from public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/WipeReadsTest.scala rename to biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/WipeReadsTest.scala diff --git a/public/biopet-utils/pom.xml b/biopet-utils/pom.xml similarity index 100% rename from public/biopet-utils/pom.xml rename to biopet-utils/pom.xml diff --git a/public/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotScatter.R b/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotScatter.R similarity index 100% rename from public/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotScatter.R rename to biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotScatter.R diff --git a/public/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotXY.R b/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotXY.R similarity index 100% rename from public/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotXY.R rename to biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotXY.R diff --git a/public/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/stackedBar.R b/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/stackedBar.R similarity index 100% rename from public/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/stackedBar.R rename to biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/stackedBar.R diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/package.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/package.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/package.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/package.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BiopetExecutable.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BiopetExecutable.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BiopetExecutable.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BiopetExecutable.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala similarity index 97% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala index 01f5f37c34f02cddefa335dbdb1257ac1f16b3bf..9dd5c884c9f392bd30c7179c15abbda871a7233b 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala @@ -430,6 +430,18 @@ object ConfigUtils extends Logging { else Nil } + /** Convert ConfigValue to List[Double] */ + implicit def configValue2doubleList(value: ConfigValue): List[Double] = { + if (requiredValue(value)) any2list(value.value).map(any2double(_)) + else Nil + } + + /** Convert ConfigValue to List[Int] */ + implicit def configValue2intList(value: ConfigValue): List[Int] = { + if (requiredValue(value)) any2list(value.value).map(any2int(_)) + else Nil + } + /** Convert ConfigValue to Set[String] */ implicit def configValue2stringSet(value: ConfigValue): Set[String] = { if (requiredValue(value)) any2stringSet(value.value) diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/IoUtils.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/IoUtils.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/IoUtils.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/IoUtils.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/LazyCheck.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/LazyCheck.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/LazyCheck.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/LazyCheck.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/Logging.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/Logging.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/Logging.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/Logging.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/MainCommand.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/MainCommand.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/MainCommand.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/MainCommand.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ToolCommand.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ToolCommand.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ToolCommand.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ToolCommand.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala similarity index 93% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala index e724575ac08dd288e9ff5ae08237bdb0cb208657..5b100606f0dc00bcd637c667871cb5c2d8da99cd 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala @@ -127,4 +127,11 @@ object VcfUtils { def hasMinGenomeQuality(gt: Genotype, minGQ: Int): Boolean = { gt.hasGQ && gt.getGQ >= minGQ } + + def getVcfIndexFile(vcfFile: File): File = { + val name = vcfFile.getAbsolutePath + if (name.endsWith(".vcf")) new File(name + ".idx") + else if (name.endsWith(".vcf.gz")) new File(name + ".tbi") + else throw new IllegalArgumentException(s"File given is no vcf file: $vcfFile") + } } diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Config.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Config.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Config.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Config.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/ConfigValue.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/ConfigValue.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/ConfigValue.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/ConfigValue.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/ConfigValueIndex.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/ConfigValueIndex.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/ConfigValueIndex.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/ConfigValueIndex.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Configurable.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Configurable.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Configurable.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Configurable.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/package.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/package.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/package.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/package.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/LinePlot.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/LinePlot.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/LinePlot.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/LinePlot.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/Rscript.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/Rscript.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/Rscript.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/Rscript.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/ScatterPlot.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/ScatterPlot.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/ScatterPlot.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/ScatterPlot.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/StackedBarPlot.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/StackedBarPlot.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/StackedBarPlot.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/StackedBarPlot.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala similarity index 100% rename from public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala rename to biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala diff --git a/public/biopet-utils/src/test/resources/log4j.properties b/biopet-utils/src/test/resources/log4j.properties similarity index 100% rename from public/biopet-utils/src/test/resources/log4j.properties rename to biopet-utils/src/test/resources/log4j.properties diff --git a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/BamUtilsTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/BamUtilsTest.scala similarity index 100% rename from public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/BamUtilsTest.scala rename to biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/BamUtilsTest.scala diff --git a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/ConfigUtilsTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/ConfigUtilsTest.scala similarity index 100% rename from public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/ConfigUtilsTest.scala rename to biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/ConfigUtilsTest.scala diff --git a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/PackageTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/PackageTest.scala similarity index 100% rename from public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/PackageTest.scala rename to biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/PackageTest.scala diff --git a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/config/ConfigTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/config/ConfigTest.scala similarity index 100% rename from public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/config/ConfigTest.scala rename to biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/config/ConfigTest.scala diff --git a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/config/ConfigValueTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/config/ConfigValueTest.scala similarity index 100% rename from public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/config/ConfigValueTest.scala rename to biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/config/ConfigValueTest.scala diff --git a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/config/ConfigurableTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/config/ConfigurableTest.scala similarity index 100% rename from public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/config/ConfigurableTest.scala rename to biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/config/ConfigurableTest.scala diff --git a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordListTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordListTest.scala similarity index 100% rename from public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordListTest.scala rename to biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordListTest.scala diff --git a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordTest.scala similarity index 100% rename from public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordTest.scala rename to biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordTest.scala diff --git a/public/bammetrics/.gitignore b/carp/.gitignore similarity index 100% rename from public/bammetrics/.gitignore rename to carp/.gitignore diff --git a/public/carp/pom.xml b/carp/pom.xml similarity index 100% rename from public/carp/pom.xml rename to carp/pom.xml diff --git a/public/carp/src/main/resources/nl/lumc/sasc/biopet/pipelines/carp/carpFront.ssp b/carp/src/main/resources/nl/lumc/sasc/biopet/pipelines/carp/carpFront.ssp similarity index 100% rename from public/carp/src/main/resources/nl/lumc/sasc/biopet/pipelines/carp/carpFront.ssp rename to carp/src/main/resources/nl/lumc/sasc/biopet/pipelines/carp/carpFront.ssp diff --git a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala b/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala similarity index 100% rename from public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala rename to carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala diff --git a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpReport.scala b/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpReport.scala similarity index 100% rename from public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpReport.scala rename to carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpReport.scala diff --git a/public/carp/src/test/resources/log4j.properties b/carp/src/test/resources/log4j.properties similarity index 100% rename from public/carp/src/test/resources/log4j.properties rename to carp/src/test/resources/log4j.properties diff --git a/public/bammetrics/src/test/resources/ref.dict b/carp/src/test/resources/ref.dict similarity index 100% rename from public/bammetrics/src/test/resources/ref.dict rename to carp/src/test/resources/ref.dict diff --git a/public/bammetrics/src/test/resources/ref.fa b/carp/src/test/resources/ref.fa similarity index 100% rename from public/bammetrics/src/test/resources/ref.fa rename to carp/src/test/resources/ref.fa diff --git a/public/bammetrics/src/test/resources/ref.fa.fai b/carp/src/test/resources/ref.fa.fai similarity index 100% rename from public/bammetrics/src/test/resources/ref.fa.fai rename to carp/src/test/resources/ref.fa.fai diff --git a/public/carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala b/carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala similarity index 100% rename from public/carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala rename to carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala diff --git a/docs/pipelines/toucan.md b/docs/pipelines/toucan.md index 12f4ea108c54402f51d37e6f673d97e633058d6f..5f362bf13fa083d571f57e03f95351fc1231a16b 100644 --- a/docs/pipelines/toucan.md +++ b/docs/pipelines/toucan.md @@ -83,6 +83,25 @@ The following config values are optional: Annotation queries can be set by the `annotation_queries` config value in the `manwe` config namespace. By default, a global query is returned. + +###Groups +In case you want to add your samples to a specific group in your varda database, you can use the tagging system in your sample config. +Specifically, the `varda_group` tag should be a list of strings pointing to group. + +E.g. : + +```json +{ + "samples": { + "sample1": { + "tags": { + "varda_group": ["group1", "group2"] + } + } + } +} +``` + Running the pipeline --------------- The command to run the pipeline is: diff --git a/public/basty/.gitignore b/flexiprep/.gitignore similarity index 100% rename from public/basty/.gitignore rename to flexiprep/.gitignore diff --git a/public/flexiprep/pom.xml b/flexiprep/pom.xml similarity index 100% rename from public/flexiprep/pom.xml rename to flexiprep/pom.xml diff --git a/public/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp b/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp similarity index 100% rename from public/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp rename to flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp diff --git a/public/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFastQcPlot.ssp b/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFastQcPlot.ssp similarity index 100% rename from public/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFastQcPlot.ssp rename to flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFastQcPlot.ssp diff --git a/public/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFront.ssp b/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFront.ssp similarity index 100% rename from public/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFront.ssp rename to flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFront.ssp diff --git a/public/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp b/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp similarity index 100% rename from public/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp rename to flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp diff --git a/public/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp b/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp similarity index 100% rename from public/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp rename to flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp diff --git a/public/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp b/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp similarity index 100% rename from public/flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp rename to flexiprep/src/main/resources/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CheckValidateFastq.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CheckValidateFastq.scala similarity index 100% rename from public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CheckValidateFastq.scala rename to flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CheckValidateFastq.scala diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala similarity index 80% rename from public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala rename to flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala index fc8db7ab30f7581c7638f15c48bba6e9443eb195..3cb06df0e160cb97b98710de74f7ca9fa31ce919 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala @@ -16,6 +16,7 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep import nl.lumc.sasc.biopet.utils.config.Configurable +import scala.collection.JavaConversions._ /** * Cutadapt wrapper specific for Flexiprep. @@ -41,23 +42,26 @@ class Cutadapt(root: Configurable, fastqc: Fastqc) extends nl.lumc.sasc.biopet.e val adapterCounts: Map[String, Any] = initStats.get(adaptersStatsName) match { // "adapters" key found in statistics case Some(m: Map[_, _]) => m.flatMap { - case (seq: String, count) => - seqToNameMap.get(seq) match { + case (adapterSequence: String, adapterStats: Map[_, _]) => + seqToNameMap.get(adapterSequence) match { // adapter sequence is found by FastQC - case Some(n) => Some(n -> Map("sequence" -> seq, "count" -> count)) + case Some(adapterSeqName) => { + Some(adapterSeqName -> + Map("sequence" -> adapterSequence, "stats" -> adapterStats.toMap) + ) + } // adapter sequence is clipped but not found by FastQC ~ should not happen since all clipped adapter // sequences come from FastQC case _ => - throw new IllegalStateException(s"Adapter '$seq' is clipped but not found by FastQC in '$fastqInput'.") + throw new IllegalStateException(s"Adapter '$adapterSequence' is clipped but not found by FastQC in '$fastqInput'.") } // FastQC found no adapters case otherwise => - ; logger.debug(s"No adapters found for summarizing in '$fastqInput'.") None } // "adapters" key not found ~ something went wrong in our part - case _ => throw new RuntimeException(s"Required key 'adapters' not found in stats entry '$fastqInput'.") + case _ => throw new RuntimeException(s"Required key '${adaptersStatsName}' not found in stats entry '${fastqInput}'.") } initStats.updated(adaptersStatsName, adapterCounts) } diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala similarity index 98% rename from public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala rename to flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala index 1bbc7b3f520b549933ec650b7eed847c97113692..bfcbf1d543a37d2e17d2897a1989183d2dfcfa58 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala @@ -22,8 +22,8 @@ import nl.lumc.sasc.biopet.core.summary.Summarizable import nl.lumc.sasc.biopet.utils.config.Configurable import scala.io.Source - import htsjdk.samtools.util.SequenceUtil.reverseComplement +import org.broadinstitute.gatk.utils.commandline.Output /** * FastQC wrapper with added functionality for the Flexiprep pipeline @@ -216,6 +216,9 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r } else Set() } + @Output + private var outputFiles: List[File] = Nil + def summaryFiles: Map[String, File] = { val outputFiles = Map("plot_duplication_levels" -> ("Images" + File.separator + "duplication_levels.png"), "plot_kmer_profiles" -> ("Images" + File.separator + "kmer_profiles.png"), diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala similarity index 100% rename from public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala rename to flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepReport.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepReport.scala similarity index 100% rename from public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepReport.scala rename to flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepReport.scala diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala similarity index 99% rename from public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala rename to flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala index 5d92bf92b5aa8fe4b3c98a2dbc71efa76c5a2588..c343be2297474025f70224024d827c1f21cfab78 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala @@ -77,6 +77,9 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman } } + @Output + private var outputFiles: List[File] = Nil + override def beforeGraph(): Unit = { super.beforeGraph() require(read != null) diff --git a/flexiprep/src/test/resources/ct-test.R1.clip.stats b/flexiprep/src/test/resources/ct-test.R1.clip.stats new file mode 100644 index 0000000000000000000000000000000000000000..4a280ef0a7d2588169c02b5e40432f4f903c69b8 --- /dev/null +++ b/flexiprep/src/test/resources/ct-test.R1.clip.stats @@ -0,0 +1,160 @@ +This is cutadapt 1.9.1 with Python 2.7.6 +Command line parameters: -b CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC -b GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG -b CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC -b GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG --error-rate 0.2 --times 2 -m 15 ct_r1.fq.gz.seqtk.fq --output ct_r1.fq.gz.cutadapt.fq +Trimming 4 adapters with at most 20.0% errors in single-end mode ... +Finished in 0.19 s (189 us/read; 0.32 M reads/minute). + +=== Summary === + +Total reads processed: 1,000 +Reads with adapters: 440 (44.0%) +Reads that were too short: 15 (1.5%) +Reads written (passing filters): 985 (98.5%) + +Total basepairs processed: 100,000 bp +Total written (filtered): 89,423 bp (89.4%) + +=== Adapter 1 === + +Sequence: CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC; Type: variable 5'/3'; Length: 63; Trimmed: 94 times. +18 times, it overlapped the 5' end of a read +76 times, it overlapped the 3' end or was within the read + +No. of allowed errors: +0-4 bp: 0; 5-9 bp: 1; 10-14 bp: 2; 15-19 bp: 3; 20-24 bp: 4; 25-29 bp: 5; 30-34 bp: 6; 35-39 bp: 7; 40-44 bp: 8; 45-49 bp: 9; 50-54 bp: 10; 55-59 bp: 11; 60-63 bp: 12 + +Overview of removed sequences (5') +length count expect max.err error counts +3 8 15.6 0 8 +4 3 3.9 0 2 1 +5 2 1.0 1 0 2 +6 4 0.2 1 1 3 +9 1 0.0 1 0 0 1 + + +Overview of removed sequences (3' or within) +length count expect max.err error counts +3 13 15.6 0 13 +4 19 3.9 0 3 16 +5 21 1.0 1 0 21 +6 18 0.2 1 1 17 +7 2 0.1 1 0 2 +9 1 0.0 1 0 0 1 +11 1 0.0 2 0 0 1 +12 1 0.0 2 0 0 1 + +=== Adapter 2 === + +Sequence: GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG; Type: variable 5'/3'; Length: 63; Trimmed: 340 times. +117 times, it overlapped the 5' end of a read +223 times, it overlapped the 3' end or was within the read + +No. of allowed errors: +0-4 bp: 0; 5-9 bp: 1; 10-14 bp: 2; 15-19 bp: 3; 20-24 bp: 4; 25-29 bp: 5; 30-34 bp: 6; 35-39 bp: 7; 40-44 bp: 8; 45-49 bp: 9; 50-54 bp: 10; 55-59 bp: 11; 60-63 bp: 12 + +Overview of removed sequences (5') +length count expect max.err error counts +3 14 15.6 0 14 +4 29 3.9 0 6 23 +5 32 1.0 1 3 29 +6 36 0.2 1 0 36 +8 1 0.0 1 0 1 +9 1 0.0 1 0 0 1 +10 1 0.0 2 0 0 1 +11 2 0.0 2 0 0 2 +37 1 0.0 7 0 0 0 0 0 1 + + +Overview of removed sequences (3' or within) +length count expect max.err error counts +3 18 15.6 0 18 +4 9 3.9 0 5 4 +5 15 1.0 1 8 7 +6 10 0.2 1 8 2 +7 7 0.1 1 5 2 +8 10 0.0 1 9 1 +9 6 0.0 1 5 1 +10 8 0.0 2 5 0 3 +11 4 0.0 2 4 +12 4 0.0 2 4 +13 9 0.0 2 9 +14 4 0.0 2 3 0 1 +15 7 0.0 3 7 +16 2 0.0 3 2 +17 4 0.0 3 2 1 0 1 +18 2 0.0 3 2 +19 2 0.0 3 2 +20 2 0.0 4 0 1 1 +21 7 0.0 4 6 1 +22 7 0.0 4 7 +23 2 0.0 4 2 +24 3 0.0 4 3 +25 5 0.0 5 5 +26 5 0.0 5 5 +27 8 0.0 5 8 +28 6 0.0 5 5 1 +29 2 0.0 5 2 +30 5 0.0 6 5 +31 3 0.0 6 3 +32 8 0.0 6 8 +33 1 0.0 6 1 +34 5 0.0 6 0 5 +35 2 0.0 7 0 0 0 0 0 0 2 +36 3 0.0 7 0 0 0 0 0 0 3 +37 4 0.0 7 0 0 0 0 0 0 0 2 2 +38 2 0.0 7 0 0 0 0 0 0 0 0 0 2 +39 4 0.0 7 0 0 0 0 1 0 0 0 0 3 +40 3 0.0 8 0 0 0 0 0 0 0 3 +41 1 0.0 8 0 0 0 0 0 0 0 1 +42 4 0.0 8 0 0 0 0 0 0 0 0 4 +43 5 0.0 8 0 0 0 0 0 0 0 0 0 5 +44 3 0.0 8 0 0 0 0 0 0 0 0 0 0 3 +46 1 0.0 9 0 0 0 0 0 0 0 0 0 0 1 +49 1 0.0 9 0 0 0 0 0 1 + +=== Adapter 3 === + +Sequence: CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC; Type: variable 5'/3'; Length: 63; Trimmed: 0 times. + +=== Adapter 4 === + +Sequence: GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG; Type: variable 5'/3'; Length: 63; Trimmed: 82 times. +15 times, it overlapped the 5' end of a read +67 times, it overlapped the 3' end or was within the read + +No. of allowed errors: +0-4 bp: 0; 5-9 bp: 1; 10-14 bp: 2; 15-19 bp: 3; 20-24 bp: 4; 25-29 bp: 5; 30-34 bp: 6; 35-39 bp: 7; 40-44 bp: 8; 45-49 bp: 9; 50-54 bp: 10; 55-59 bp: 11; 60-63 bp: 12 + +Overview of removed sequences (5') +length count expect max.err error counts +26 1 0.0 5 0 1 +61 2 0.0 12 0 0 0 2 +64 11 0.0 12 0 0 0 11 +72 1 0.0 12 0 0 0 0 0 0 0 0 0 0 0 1 + + +Overview of removed sequences (3' or within) +length count expect max.err error counts +45 3 0.0 9 0 0 0 3 +46 2 0.0 9 0 0 0 2 +47 3 0.0 9 0 0 0 3 +48 3 0.0 9 0 0 0 3 +49 2 0.0 9 0 0 0 2 +50 3 0.0 10 0 0 0 3 +51 2 0.0 10 0 0 0 2 +52 6 0.0 10 0 0 0 6 +53 1 0.0 10 0 0 0 1 +54 5 0.0 10 0 0 0 4 0 1 +56 2 0.0 11 0 0 0 2 +57 2 0.0 11 0 0 0 2 +58 2 0.0 11 0 0 0 2 +59 3 0.0 11 0 0 0 2 0 0 0 0 0 1 +61 1 0.0 12 0 0 0 0 0 1 +62 3 0.0 12 0 0 0 2 1 +63 1 0.0 12 0 0 0 0 1 +66 3 0.0 12 0 0 0 3 +67 3 0.0 12 0 0 0 3 +70 1 0.0 12 0 0 0 1 +72 1 0.0 12 0 0 0 1 +80 1 0.0 12 0 0 0 1 +99 14 0.0 12 0 0 0 14 + diff --git a/public/flexiprep/src/test/resources/fqc_contaminants_v0101.txt b/flexiprep/src/test/resources/fqc_contaminants_v0101.txt similarity index 100% rename from public/flexiprep/src/test/resources/fqc_contaminants_v0101.txt rename to flexiprep/src/test/resources/fqc_contaminants_v0101.txt diff --git a/flexiprep/src/test/resources/fqc_contaminants_v0112.txt b/flexiprep/src/test/resources/fqc_contaminants_v0112.txt new file mode 100644 index 0000000000000000000000000000000000000000..d2c29bee8171e0454994c6d7d6f0f4780efb3921 --- /dev/null +++ b/flexiprep/src/test/resources/fqc_contaminants_v0112.txt @@ -0,0 +1,182 @@ +# This file contains a list of potential contaminants which are +# frequently found in high throughput sequencing reactions. These +# are mostly sequences of adapters / primers used in the various +# sequencing chemistries. +# +# Please DO NOT rely on these sequences to design your own oligos, some +# of them are truncated at ambiguous positions, and none of them are +# definitive sequences from the manufacturers so don't blame us if you +# try to use them and they don't work. +# +# You can add more sequences to the file by putting one line per entry +# and specifying a name[tab]sequence. If the contaminant you add is +# likely to be of use to others please consider sending it to the FastQ +# authors, either via a bug report at www.bioinformatics.babraham.ac.uk/bugzilla/ +# or by directly emailing simon.andrews@babraham.ac.uk so other users of +# the program can benefit. + +Illumina Single End Adapter 1 GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG +Illumina Single End Adapter 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT +Illumina Single End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Single End PCR Primer 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT +Illumina Single End Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT + +Illumina Paired End Adapter 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Paired End Adapter 2 GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG +Illumina Paried End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Paired End PCR Primer 2 CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT +Illumina Paried End Sequencing Primer 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Paired End Sequencing Primer 2 CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT + +Illumina DpnII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGAC +Illumina DpnII expression Adapter 2 CAAGCAGAAGACGGCATACGA +Illumina DpnII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA +Illumina DpnII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA +Illumina DpnII expression Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC + +Illumina NlaIII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGACATG +Illumina NlaIII expression Adapter 2 CAAGCAGAAGACGGCATACGA +Illumina NlaIII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA +Illumina NlaIII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA +Illumina NlaIII expression Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG + +Illumina Small RNA Adapter 1 GTTCAGAGTTCTACAGTCCGACGATC +Illumina Small RNA Adapter 2 TGGAATTCTCGGGTGCCAAGG +Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA +Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA +Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA +Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC + +Illumina Multiplexing Adapter 1 GATCGGAAGAGCACACGTCT +Illumina Multiplexing Adapter 2 ACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Multiplexing PCR Primer 1.01 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Multiplexing PCR Primer 2.01 GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT +Illumina Multiplexing Read1 Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT +Illumina Multiplexing Index Sequencing Primer GATCGGAAGAGCACACGTCTGAACTCCAGTCAC +Illumina Multiplexing Read2 Sequencing Primer GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT + +Illumina PCR Primer Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTC +Illumina PCR Primer Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTC +Illumina PCR Primer Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTC +Illumina PCR Primer Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTC +Illumina PCR Primer Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTC +Illumina PCR Primer Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTC +Illumina PCR Primer Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTC +Illumina PCR Primer Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTC +Illumina PCR Primer Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTC +Illumina PCR Primer Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTC +Illumina PCR Primer Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTC +Illumina PCR Primer Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTC + +Illumina DpnII Gex Adapter 1 GATCGTCGGACTGTAGAACTCTGAAC +Illumina DpnII Gex Adapter 1.01 ACAGGTTCAGAGTTCTACAGTCCGAC +Illumina DpnII Gex Adapter 2 CAAGCAGAAGACGGCATACGA +Illumina DpnII Gex Adapter 2.01 TCGTATGCCGTCTTCTGCTTG +Illumina DpnII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA +Illumina DpnII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA +Illumina DpnII Gex Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC + +Illumina NlaIII Gex Adapter 1.01 TCGGACTGTAGAACTCTGAAC +Illumina NlaIII Gex Adapter 1.02 ACAGGTTCAGAGTTCTACAGTCCGACATG +Illumina NlaIII Gex Adapter 2.01 CAAGCAGAAGACGGCATACGA +Illumina NlaIII Gex Adapter 2.02 TCGTATGCCGTCTTCTGCTTG +Illumina NlaIII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA +Illumina NlaIII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA +Illumina NlaIII Gex Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG + +Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA +Illumina 5p RNA Adapter GTTCAGAGTTCTACAGTCCGACGATC +Illumina RNA Adapter1 TGGAATTCTCGGGTGCCAAGG + +Illumina Small RNA 3p Adapter 1 ATCTCGTATGCCGTCTTCTGCTTG +Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA +Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA +Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC + +TruSeq Universal Adapter AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +TruSeq Adapter, Index 1 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 2 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 3 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 4 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 5 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 6 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 7 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 8 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 9 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 10 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 11 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 12 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 13 GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTCAACTCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 14 GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTTCCGTCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 15 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATGTCAGTCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 16 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCGTCCCTCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 18 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 19 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGAAACTCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 20 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGGCCTTCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 21 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTTTCGGTCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 22 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGTACGTTCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 23 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCACTCTTCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 25 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTGATATCTCGTATGCCGTCTTCTGCTTG +TruSeq Adapter, Index 27 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATTCCTTTCTCGTATGCCGTCTTCTGCTTG + +Illumina RNA RT Primer GCCTTGGCACCCGAGAATTCCA +Illumina RNA PCR Primer AATGATACGGCGACCACCGAGATCTACACGTTCAGAGTTCTACAGTCCGA + +RNA PCR Primer, Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 13 CAAGCAGAAGACGGCATACGAGATTTGACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 14 CAAGCAGAAGACGGCATACGAGATGGAACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 15 CAAGCAGAAGACGGCATACGAGATTGACATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 16 CAAGCAGAAGACGGCATACGAGATGGACGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 17 CAAGCAGAAGACGGCATACGAGATCTCTACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 18 CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 19 CAAGCAGAAGACGGCATACGAGATTTTCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 20 CAAGCAGAAGACGGCATACGAGATGGCCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 21 CAAGCAGAAGACGGCATACGAGATCGAAACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 22 CAAGCAGAAGACGGCATACGAGATCGTACGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 23 CAAGCAGAAGACGGCATACGAGATCCACTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 24 CAAGCAGAAGACGGCATACGAGATGCTACCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 25 CAAGCAGAAGACGGCATACGAGATATCAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 26 CAAGCAGAAGACGGCATACGAGATGCTCATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 27 CAAGCAGAAGACGGCATACGAGATAGGAATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 28 CAAGCAGAAGACGGCATACGAGATCTTTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 29 CAAGCAGAAGACGGCATACGAGATTAGTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 30 CAAGCAGAAGACGGCATACGAGATCCGGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 31 CAAGCAGAAGACGGCATACGAGATATCGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 32 CAAGCAGAAGACGGCATACGAGATTGAGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 33 CAAGCAGAAGACGGCATACGAGATCGCCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 34 CAAGCAGAAGACGGCATACGAGATGCCATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 35 CAAGCAGAAGACGGCATACGAGATAAAATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 36 CAAGCAGAAGACGGCATACGAGATTGTTGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 37 CAAGCAGAAGACGGCATACGAGATATTCCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 38 CAAGCAGAAGACGGCATACGAGATAGCTAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 39 CAAGCAGAAGACGGCATACGAGATGTATAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 40 CAAGCAGAAGACGGCATACGAGATTCTGAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 41 CAAGCAGAAGACGGCATACGAGATGTCGTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 42 CAAGCAGAAGACGGCATACGAGATCGATTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 43 CAAGCAGAAGACGGCATACGAGATGCTGTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 44 CAAGCAGAAGACGGCATACGAGATATTATAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 45 CAAGCAGAAGACGGCATACGAGATGAATGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 46 CAAGCAGAAGACGGCATACGAGATTCGGGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 47 CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA +RNA PCR Primer, Index 48 CAAGCAGAAGACGGCATACGAGATTGCCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA + +ABI Dynabead EcoP Oligo CTGATCTAGAGGTACCGGATCCCAGCAGT +ABI Solid3 Adapter A CTGCCCCGGGTTCCTCATTCTCTCAGCAGCATG +ABI Solid3 Adapter B CCACTACGCCTCCGCTTTCCTCTCTATGGGCAGTCGGTGAT +ABI Solid3 5' AMP Primer CCACTACGCCTCCGCTTTCCTCTCTATG +ABI Solid3 3' AMP Primer CTGCCCCGGGTTCCTCATTCT +ABI Solid3 EF1 alpha Sense Primer CATGTGTGTTGAGAGCTTC +ABI Solid3 EF1 alpha Antisense Primer GAAAACCAAAGTGGTCCAC +ABI Solid3 GAPDH Forward Primer TTAGCACCCCTGGCCAAGG +ABI Solid3 GAPDH Reverse Primer CTTACTCCTTGGAGGCCATG diff --git a/public/flexiprep/src/test/resources/log4j.properties b/flexiprep/src/test/resources/log4j.properties similarity index 100% rename from public/flexiprep/src/test/resources/log4j.properties rename to flexiprep/src/test/resources/log4j.properties diff --git a/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt b/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt new file mode 100644 index 0000000000000000000000000000000000000000..02b9e3f0cbf01c6ce54fa715df93d7cfc6ba4bab --- /dev/null +++ b/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt @@ -0,0 +1,466 @@ +##FastQC 0.11.2 +>>Basic Statistics pass +#Measure Value +Filename ct_r1.fq.gz +File type Conventional base calls +Encoding Sanger / Illumina 1.9 +Total Sequences 1000 +Sequences flagged as poor quality 0 +Sequence length 100 +%GC 53 +>>END_MODULE +>>Per base sequence quality fail +#Base Mean Median Lower Quartile Upper Quartile 10th Percentile 90th Percentile +1 32.244 33.0 31.0 34.0 30.0 34.0 +2 32.589 34.0 31.0 34.0 31.0 34.0 +3 32.814 34.0 31.0 34.0 31.0 34.0 +4 36.231 37.0 35.0 37.0 35.0 37.0 +5 35.907 37.0 35.0 37.0 35.0 37.0 +6 35.934 37.0 35.0 37.0 35.0 37.0 +7 35.783 37.0 35.0 37.0 35.0 37.0 +8 36.008 37.0 35.0 37.0 35.0 37.0 +9 37.706 39.0 37.0 39.0 35.0 39.0 +10-11 37.709 39.0 37.0 39.0 35.0 39.0 +12-13 37.6135 39.0 37.0 39.0 35.0 39.0 +14-15 38.793 40.0 38.0 41.0 34.5 41.0 +16-17 39.033500000000004 40.5 38.0 41.0 35.0 41.0 +18-19 38.942 40.0 38.0 41.0 35.0 41.0 +20-21 38.888 40.0 38.0 41.0 35.0 41.0 +22-23 38.807 40.0 38.0 41.0 35.0 41.0 +24-25 38.702 40.0 38.0 41.0 34.0 41.0 +26-27 38.65 40.0 38.0 41.0 34.5 41.0 +28-29 38.4885 40.0 38.0 41.0 34.5 41.0 +30-31 38.307 40.0 38.0 41.0 34.0 41.0 +32-33 38.433499999999995 40.0 38.0 41.0 34.0 41.0 +34-35 38.3425 40.0 38.0 41.0 33.5 41.0 +36-37 38.1185 40.0 37.5 41.0 33.5 41.0 +38-39 38.088499999999996 40.0 37.0 41.0 33.5 41.0 +40-41 37.555 40.0 36.0 41.0 32.5 41.0 +42-43 37.504999999999995 40.0 36.0 41.0 33.0 41.0 +44-45 37.167 39.0 35.5 41.0 32.0 41.0 +46-47 36.980999999999995 39.0 35.0 41.0 31.0 41.0 +48-49 36.8635 39.0 35.0 40.5 31.0 41.0 +50-51 36.4125 38.5 35.0 40.0 30.5 41.0 +52-53 35.528000000000006 37.5 34.5 40.0 28.5 41.0 +54-55 34.925 37.0 33.5 40.0 27.5 41.0 +56-57 34.8735 37.0 34.0 40.0 27.5 41.0 +58-59 34.7225 36.0 33.5 40.0 28.0 41.0 +60-61 34.67400000000001 36.0 34.0 39.0 28.5 40.5 +62-63 33.841499999999996 35.0 33.0 38.5 26.5 40.0 +64-65 33.549 35.0 33.0 38.0 26.0 40.0 +66-67 32.971999999999994 35.0 33.0 37.0 26.0 40.0 +68-69 32.1635 35.0 32.0 37.0 22.5 39.0 +70-71 30.002000000000002 34.0 30.5 36.0 2.0 39.0 +72-73 29.0695 34.0 29.0 35.5 2.0 38.5 +74-75 28.641 34.0 29.0 35.0 2.0 38.0 +76-77 27.8495 33.0 27.5 35.0 2.0 36.0 +78-79 26.5345 33.0 24.0 35.0 2.0 36.5 +80-81 26.140500000000003 33.0 23.0 35.0 2.0 36.0 +82-83 25.784 33.0 21.5 35.0 2.0 35.0 +84-85 25.6115 33.0 20.0 35.0 2.0 35.0 +86-87 25.1755 33.0 17.0 35.0 2.0 35.0 +88-89 24.600499999999997 31.5 13.5 34.0 2.0 35.0 +90-91 24.088 31.5 6.5 34.0 2.0 35.0 +92-93 24.16 32.0 8.5 34.0 2.0 35.0 +94-95 23.02 30.0 2.0 34.0 2.0 35.0 +96-97 23.183 30.5 2.0 34.0 2.0 35.0 +98-99 22.75 30.5 2.0 34.0 2.0 35.0 +100 21.984 30.0 2.0 34.0 2.0 35.0 +>>END_MODULE +>>Per tile sequence quality pass +#Tile Base Mean +1101 1 0.0 +1101 2 0.0 +1101 3 0.0 +1101 4 0.0 +1101 5 0.0 +1101 6 0.0 +1101 7 0.0 +1101 8 0.0 +1101 9 0.0 +1101 10-11 0.0 +1101 12-13 0.0 +1101 14-15 0.0 +1101 16-17 0.0 +1101 18-19 0.0 +1101 20-21 0.0 +1101 22-23 0.0 +1101 24-25 0.0 +1101 26-27 0.0 +1101 28-29 0.0 +1101 30-31 0.0 +1101 32-33 0.0 +1101 34-35 0.0 +1101 36-37 0.0 +1101 38-39 0.0 +1101 40-41 0.0 +1101 42-43 0.0 +1101 44-45 0.0 +1101 46-47 0.0 +1101 48-49 0.0 +1101 50-51 0.0 +1101 52-53 0.0 +1101 54-55 0.0 +1101 56-57 0.0 +1101 58-59 0.0 +1101 60-61 0.0 +1101 62-63 0.0 +1101 64-65 0.0 +1101 66-67 0.0 +1101 68-69 0.0 +1101 70-71 0.0 +1101 72-73 0.0 +1101 74-75 0.0 +1101 76-77 0.0 +1101 78-79 0.0 +1101 80-81 0.0 +1101 82-83 0.0 +1101 84-85 0.0 +1101 86-87 0.0 +1101 88-89 0.0 +1101 90-91 0.0 +1101 92-93 0.0 +1101 94-95 0.0 +1101 96-97 0.0 +1101 98-99 0.0 +1101 100 0.0 +>>END_MODULE +>>Per sequence quality scores pass +#Quality Count +11 1.0 +12 4.0 +13 3.0 +14 1.0 +15 4.0 +16 4.0 +17 6.0 +18 7.0 +19 4.0 +20 2.0 +21 7.0 +22 9.0 +23 9.0 +24 17.0 +25 23.0 +26 30.0 +27 52.0 +28 39.0 +29 28.0 +30 23.0 +31 33.0 +32 43.0 +33 47.0 +34 74.0 +35 88.0 +36 148.0 +37 202.0 +38 89.0 +39 3.0 +>>END_MODULE +>>Per base sequence content fail +#Base G A T C +1 52.35707121364093 17.251755265797392 11.735205616850552 18.655967903711137 +2 34.300000000000004 11.1 24.8 29.799999999999997 +3 41.0 6.5 20.200000000000003 32.300000000000004 +4 37.5 8.7 26.0 27.800000000000004 +5 35.4 12.4 31.8 20.4 +6 57.3 11.1 1.6 30.0 +7 20.9 24.7 32.6 21.8 +8 20.0 27.200000000000003 30.0 22.8 +9 24.5 21.5 27.800000000000004 26.200000000000003 +10-11 25.15 24.0 27.55 23.3 +12-13 26.200000000000003 22.3 24.65 26.85 +14-15 24.75 21.95 26.3 27.0 +16-17 25.4 21.7 26.55 26.35 +18-19 27.650000000000002 21.6 25.85 24.9 +20-21 24.8 21.8 24.3 29.099999999999998 +22-23 25.900000000000002 23.05 24.15 26.900000000000002 +24-25 24.85 21.4 25.900000000000002 27.85 +26-27 24.7 20.849999999999998 25.0 29.45 +28-29 24.4 23.3 24.95 27.35 +30-31 27.35 20.95 25.15 26.55 +32-33 24.9 22.05 23.400000000000002 29.65 +34-35 25.6 22.15 25.900000000000002 26.35 +36-37 24.95 21.2 23.400000000000002 30.45 +38-39 24.8 23.35 23.7 28.15 +40-41 27.0 23.35 23.599999999999998 26.05 +42-43 25.15 22.35 23.799999999999997 28.7 +44-45 26.200000000000003 20.7 24.3 28.799999999999997 +46-47 26.3 24.0 23.150000000000002 26.55 +48-49 25.5 23.3 24.05 27.150000000000002 +50-51 27.55 22.75 23.7 26.0 +52-53 24.45 23.400000000000002 23.1 29.049999999999997 +54-55 27.450000000000003 21.85 23.0 27.700000000000003 +56-57 25.85 22.15 23.5 28.499999999999996 +58-59 24.05 22.75 25.6 27.6 +60-61 25.25 20.95 23.45 30.349999999999998 +62-63 27.3 21.9 23.7 27.1 +64-65 26.178535606820464 24.57372116349047 22.617853560682047 26.629889669007024 +66-67 25.7 23.75 22.05 28.499999999999996 +68-69 25.405679513184587 23.52941176470588 24.036511156186613 27.028397565922923 +70-71 25.159574468085104 23.085106382978722 23.138297872340424 28.617021276595743 +72-73 26.031065881092662 26.513122656668454 20.51419389394751 26.941617568291377 +74-75 25.197680548234054 26.56826568265683 21.929362150764366 26.304691618344755 +76-77 25.911812738160044 23.51660315732172 24.550898203592812 26.02068590092542 +78-79 26.16345062429058 22.985244040862657 21.793416572077184 29.05788876276958 +80-81 26.98324022346369 25.474860335195533 21.005586592178773 26.536312849162012 +82-83 26.46370023419204 24.355971896955502 22.131147540983605 27.049180327868854 +84-85 26.124567474048444 23.18339100346021 22.145328719723185 28.546712802768166 +86-87 25.976331360946748 25.443786982248522 22.36686390532544 26.21301775147929 +88-89 25.503742084052966 23.54634427173287 23.316062176165804 27.63385146804836 +90-91 23.832052040212893 21.525724423418097 25.901833234772326 28.74039030159669 +92-93 24.525139664804467 22.849162011173185 23.743016759776538 28.88268156424581 +94-95 25.161987041036717 24.028077753779698 22.4622030237581 28.347732181425485 +96-97 25.37393162393162 24.412393162393162 23.664529914529915 26.549145299145298 +98-99 25.67703109327984 23.620862587763288 22.71815446339017 27.9839518555667 +100 24.0 26.0 21.9 28.1 +>>END_MODULE +>>Per sequence GC content fail +#GC Content Count +0 0.0 +1 0.0 +2 0.0 +3 0.0 +4 0.0 +5 0.0 +6 0.0 +7 0.0 +8 0.0 +9 0.0 +10 0.0 +11 0.0 +12 0.0 +13 0.0 +14 0.0 +15 0.0 +16 0.0 +17 0.0 +18 0.0 +19 0.0 +20 0.0 +21 0.0 +22 0.0 +23 0.5 +24 0.5 +25 0.5 +26 1.0 +27 1.5 +28 2.0 +29 3.5 +30 5.5 +31 6.0 +32 6.5 +33 6.0 +34 4.5 +35 6.0 +36 11.0 +37 17.0 +38 21.0 +39 16.5 +40 15.0 +41 24.0 +42 28.5 +43 33.0 +44 35.5 +45 32.5 +46 32.0 +47 32.0 +48 29.5 +49 30.5 +50 30.0 +51 29.5 +52 30.0 +53 27.5 +54 26.5 +55 27.0 +56 29.5 +57 34.0 +58 36.0 +59 36.0 +60 37.0 +61 31.5 +62 24.0 +63 22.5 +64 27.0 +65 28.5 +66 20.5 +67 15.0 +68 17.0 +69 13.5 +70 8.0 +71 7.0 +72 9.0 +73 8.0 +74 5.5 +75 4.5 +76 2.0 +77 2.0 +78 3.0 +79 2.0 +80 1.5 +81 1.0 +82 0.0 +83 0.5 +84 1.0 +85 0.5 +86 0.0 +87 0.0 +88 0.0 +89 0.0 +90 0.0 +91 0.0 +92 0.0 +93 0.0 +94 0.0 +95 0.0 +96 0.0 +97 0.0 +98 0.0 +99 0.0 +100 0.0 +>>END_MODULE +>>Per base N content warn +#Base N-Count +1 0.3 +2 0.0 +3 0.0 +4 0.0 +5 0.0 +6 0.0 +7 0.0 +8 0.0 +9 0.0 +10-11 0.0 +12-13 0.0 +14-15 0.0 +16-17 0.0 +18-19 0.0 +20-21 0.0 +22-23 0.0 +24-25 0.0 +26-27 0.0 +28-29 0.0 +30-31 0.0 +32-33 0.0 +34-35 0.0 +36-37 0.0 +38-39 0.0 +40-41 0.0 +42-43 0.0 +44-45 0.0 +46-47 0.0 +48-49 0.0 +50-51 0.0 +52-53 0.0 +54-55 0.0 +56-57 0.0 +58-59 0.0 +60-61 0.0 +62-63 0.0 +64-65 0.3 +66-67 0.0 +68-69 1.4000000000000001 +70-71 6.0 +72-73 6.65 +74-75 5.1499999999999995 +76-77 8.15 +78-79 11.899999999999999 +80-81 10.5 +82-83 14.6 +84-85 13.3 +86-87 15.5 +88-89 13.15 +90-91 15.45 +92-93 10.5 +94-95 7.3999999999999995 +96-97 6.4 +98-99 0.3 +100 0.0 +>>END_MODULE +>>Sequence Length Distribution pass +#Length Count +100 1000.0 +>>END_MODULE +>>Sequence Duplication Levels pass +#Total Deduplicated Percentage 97.2 +#Duplication Level Percentage of deduplicated Percentage of total +1 99.38271604938271 96.6 +2 0.411522633744856 0.8 +3 0.0 0.0 +4 0.0 0.0 +5 0.0 0.0 +6 0.0 0.0 +7 0.0 0.0 +8 0.0 0.0 +9 0.0 0.0 +>10 0.205761316872428 2.6 +>50 0.0 0.0 +>100 0.0 0.0 +>500 0.0 0.0 +>1k 0.0 0.0 +>5k 0.0 0.0 +>10k+ 0.0 0.0 +>>END_MODULE +>>Overrepresented sequences fail +#Sequence Count Percentage Possible Source +AGATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTAT 14 1.4000000000000001 TruSeq Adapter, Index 18 (97% over 37bp) +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATG 12 1.2 TruSeq Adapter, Index 18 (97% over 37bp) +AGGGGGAATGATGGTTGTCTTTGGATATACTACAGCGATGGCTATTGAGG 2 0.2 No Hit +GGCTTGTTTTATTTTAATGGCTGATCTATGTAATCACAGAGGCCAGTATG 2 0.2 No Hit +GTGGGGTGGTGTTTGTGGGGGACTTCATCATCTCAGGCTTCCCAGGGTCC 2 0.2 No Hit +CGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATGCCG 2 0.2 TruSeq Adapter, Index 18 (97% over 34bp) +>>END_MODULE +>>Adapter Content fail +#Position Illumina Universal Adapter Illumina Small RNA Adapter Nextera Transposase Sequence +1 1.4 0.0 0.0 +2 1.4 0.0 0.0 +3 1.4 0.0 0.0 +4 1.4 0.0 0.0 +5 1.4 0.0 0.0 +6 1.4 0.0 0.0 +7 1.4 0.0 0.0 +8 1.4 0.0 0.0 +9 1.4 0.0 0.0 +10-11 1.4 0.0 0.0 +12-13 1.4 0.0 0.0 +14-15 1.4 0.0 0.0 +16-17 1.4 0.0 0.0 +18-19 1.4 0.0 0.0 +20-21 1.5 0.0 0.0 +22-23 1.5 0.0 0.0 +24-25 1.5 0.0 0.0 +26-27 1.5 0.0 0.0 +28-29 1.6 0.0 0.0 +30-31 1.7 0.0 0.0 +32-33 1.9 0.0 0.0 +34-35 2.4 0.0 0.0 +36-37 2.45 0.0 0.0 +38-39 2.95 0.0 0.0 +40-41 3.25 0.0 0.0 +42-43 3.75 0.0 0.0 +44-45 4.2 0.0 0.0 +46-47 4.9 0.0 0.0 +48-49 5.699999999999999 0.0 0.0 +50-51 6.300000000000001 0.0 0.0 +52-53 6.949999999999999 0.0 0.0 +54-55 7.65 0.0 0.0 +56-57 8.399999999999999 0.0 0.0 +58-59 9.350000000000001 0.0 0.0 +60-61 9.899999999999999 0.0 0.0 +62-63 10.600000000000001 0.0 0.0 +64-65 11.3 0.0 0.0 +66-67 12.0 0.0 0.0 +68-69 13.05 0.0 0.0 +70-71 13.6 0.0 0.0 +72-73 14.5 0.0 0.0 +74-75 15.55 0.0 0.0 +76-77 16.15 0.0 0.0 +78-79 17.2 0.0 0.0 +80-81 17.700000000000003 0.0 0.0 +82-83 18.15 0.0 0.0 +84-85 18.75 0.0 0.0 +86-87 19.799999999999997 0.0 0.0 +88 20.6 0.0 0.0 +>>END_MODULE +>>Kmer Content pass +>>END_MODULE diff --git a/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..2b537d9767cbc1ddbd9f2e528a1c122dfe973d7c --- /dev/null +++ b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala @@ -0,0 +1,84 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.pipelines.flexiprep + +import java.io.File + +import org.testng.annotations.Test + +class CutadaptTest extends FastqcV0101Test { + /** Mock output file of a Cutadapt 1.9 run */ + private[flexiprep] val cutadaptOut: File = resourceFile("ct-test.R1.clip.stats") + + def testFastQCinstance: Fastqc = { + val fqc = new Fastqc(null) + fqc.output = outputv0101 + fqc.contaminants = Option(resourceFile("fqc_contaminants_v0112.txt")) + // fqc.beforeGraph() + fqc + } + + def testCutadaptInst: Cutadapt = { + val caExe = new Cutadapt(null, testFastQCinstance) + caExe.statsOutput = cutadaptOut + caExe + } + + @Test def testAdapterFound() = { + val cutadapt = testCutadaptInst + val adapters = cutadapt.extractClippedAdapters(cutadaptOut) + adapters.keys.size shouldBe 4 + + adapters.get("CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC") shouldBe Some( + Map( + "count" -> 94, + "histogram" -> Map( + "5p" -> Map(5 -> 2, 6 -> 4, 9 -> 1, 3 -> 8, 4 -> 3), + "3p" -> Map(5 -> 21, 6 -> 18, 9 -> 1, 12 -> 1, 7 -> 2, 3 -> 13, 11 -> 1, 4 -> 19) + ) + ) + ) + + adapters.get("CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC") shouldBe Some( + Map( + "count" -> 0, + "histogram" -> Map() + ) + ) + } + + @Test def testSummary() = { + val cutadapt = testCutadaptInst + val summary = cutadapt.summaryStats + + summary.keys shouldBe Set("num_bases_input", "num_reads_input", "num_reads_output", + "num_reads_with_adapters", "num_reads_affected", "num_reads_discarded_too_long", + "adapters", "num_reads_discarded_many_n", "num_reads_discarded_too_short", "num_bases_output") + + summary.keys.size shouldBe 10 + summary("adapters").asInstanceOf[Map[String, Map[String, Any]]].keys.size shouldBe 4 + + summary("num_bases_input") shouldBe 100000 + summary("num_reads_input") shouldBe 1000 + summary("num_reads_output") shouldBe 985 + summary("num_reads_with_adapters") shouldBe 440 + summary("num_reads_affected") shouldBe 425 + summary("num_reads_discarded_too_long") shouldBe 0 + summary("num_reads_discarded_many_n") shouldBe 0 + summary("num_reads_discarded_too_short") shouldBe 15 + summary("num_bases_output") shouldBe 89423 + } +} diff --git a/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala similarity index 77% rename from public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala rename to flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala index 4cb68fdfc44d5a30c3ed76aabc9570d6f62529f3..3cf24e8c60a570e8e51fe528ece4f81d0b66a01a 100644 --- a/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala +++ b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala @@ -25,14 +25,14 @@ import org.testng.annotations.Test class FastqcV0101Test extends TestNGSuite with Matchers { /** Returns the absolute path to test resource directory as a File object */ - private val resourceDir: File = new File(Paths.get(getClass.getResource("/").toURI).toString) + private[flexiprep] val resourceDir: File = new File(Paths.get(getClass.getResource("/").toURI).toString) /** Given a resource file name, returns the the absolute path to it as a File object */ - private def resourceFile(p: String): File = new File(resourceDir, p) + private[flexiprep] def resourceFile(p: String): File = new File(resourceDir, p) /** Mock output file of a FastQC v0.10.1 run */ // the file doesn't actually exist, we just need it so the outputDir value can be computed correctly - private val outputv0101: File = resourceFile("v0101.fq_fastqc.zip") + private[flexiprep] val outputv0101: File = resourceFile("v0101.fq_fastqc.zip") @Test def testOutputDir() = { val fqc = new Fastqc(null) @@ -44,7 +44,7 @@ class FastqcV0101Test extends TestNGSuite with Matchers { val fqc = new Fastqc(null) fqc.output = outputv0101 // 11 QC modules - fqc.qcModules.size shouldBe 11 + fqc.qcModules.size shouldBe 12 // first module fqc.qcModules.keySet should contain("Basic Statistics") // mid (6th module) @@ -83,4 +83,23 @@ class FastqcV0101Test extends TestNGSuite with Matchers { adapters.last.seq shouldEqual "GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG" } + + @Test def testPerBaseSequenceQuality() = { + val fqc = new Fastqc(null) + fqc.output = outputv0101 + + val perBaseSequenceQuality = fqc.perBaseSequenceQuality + perBaseSequenceQuality.size shouldBe 55 + perBaseSequenceQuality.keys should contain("54-55") + } + + @Test def testPerBaseSequenceContent() = { + val fqc = new Fastqc(null) + fqc.output = outputv0101 + + val perBaseSequenceContent: Map[String, Map[String, Double]] = fqc.perBaseSequenceContent + perBaseSequenceContent.size shouldBe 55 + perBaseSequenceContent.keys should contain("1") + } + } \ No newline at end of file diff --git a/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepTest.scala b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepTest.scala similarity index 100% rename from public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepTest.scala rename to flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepTest.scala diff --git a/public/gears/.gitignore b/gears/.gitignore similarity index 100% rename from public/gears/.gitignore rename to gears/.gitignore diff --git a/public/gears/pom.xml b/gears/pom.xml similarity index 100% rename from public/gears/pom.xml rename to gears/pom.xml diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsFront.ssp b/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsFront.ssp similarity index 100% rename from public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsFront.ssp rename to gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsFront.ssp diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSingleFront.ssp b/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSingleFront.ssp similarity index 100% rename from public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSingleFront.ssp rename to gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSingleFront.ssp diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp b/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp similarity index 100% rename from public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp rename to gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp b/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp similarity index 100% rename from public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp rename to gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/qiimeKrona.ssp b/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/qiimeKrona.ssp similarity index 100% rename from public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/qiimeKrona.ssp rename to gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/qiimeKrona.ssp diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/img/krona/favicon.ico b/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/img/krona/favicon.ico similarity index 100% rename from public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/img/krona/favicon.ico rename to gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/img/krona/favicon.ico diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/img/krona/hidden.png b/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/img/krona/hidden.png similarity index 100% rename from public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/img/krona/hidden.png rename to gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/img/krona/hidden.png diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/img/krona/loading.gif b/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/img/krona/loading.gif similarity index 100% rename from public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/img/krona/loading.gif rename to gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/img/krona/loading.gif diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/gears.js b/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/gears.js similarity index 100% rename from public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/gears.js rename to gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/gears.js diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/krona-2.0.js b/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/krona-2.0.js similarity index 100% rename from public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/krona-2.0.js rename to gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/krona-2.0.js diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/CombineReads.scala b/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/CombineReads.scala similarity index 100% rename from public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/CombineReads.scala rename to gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/CombineReads.scala diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/ExtractUnmappedReads.scala b/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/ExtractUnmappedReads.scala similarity index 100% rename from public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/ExtractUnmappedReads.scala rename to gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/ExtractUnmappedReads.scala diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala b/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala similarity index 100% rename from public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala rename to gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKraken.scala b/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKraken.scala similarity index 100% rename from public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKraken.scala rename to gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKraken.scala diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeClosed.scala b/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeClosed.scala similarity index 100% rename from public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeClosed.scala rename to gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeClosed.scala diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeRtax.scala b/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeRtax.scala similarity index 100% rename from public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeRtax.scala rename to gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeRtax.scala diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsReport.scala b/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsReport.scala similarity index 100% rename from public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsReport.scala rename to gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsReport.scala diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSeqCount.scala b/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSeqCount.scala similarity index 100% rename from public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSeqCount.scala rename to gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSeqCount.scala diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingle.scala b/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingle.scala similarity index 100% rename from public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingle.scala rename to gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingle.scala diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleReport.scala b/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleReport.scala similarity index 100% rename from public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleReport.scala rename to gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleReport.scala diff --git a/public/gears/src/test/resources/hpv_simu_R1.krkn.json b/gears/src/test/resources/hpv_simu_R1.krkn.json similarity index 100% rename from public/gears/src/test/resources/hpv_simu_R1.krkn.json rename to gears/src/test/resources/hpv_simu_R1.krkn.json diff --git a/public/gears/src/test/resources/log4j.properties b/gears/src/test/resources/log4j.properties similarity index 100% rename from public/gears/src/test/resources/log4j.properties rename to gears/src/test/resources/log4j.properties diff --git a/public/gears/src/test/resources/otu_table.biom b/gears/src/test/resources/otu_table.biom similarity index 100% rename from public/gears/src/test/resources/otu_table.biom rename to gears/src/test/resources/otu_table.biom diff --git a/public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKrakenTest.scala b/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKrakenTest.scala similarity index 100% rename from public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKrakenTest.scala rename to gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKrakenTest.scala diff --git a/public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeClosedTest.scala b/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeClosedTest.scala similarity index 100% rename from public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeClosedTest.scala rename to gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsQiimeClosedTest.scala diff --git a/public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleTest.scala b/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleTest.scala similarity index 100% rename from public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleTest.scala rename to gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleTest.scala diff --git a/public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsTest.scala b/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsTest.scala similarity index 100% rename from public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsTest.scala rename to gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsTest.scala diff --git a/public/generate-indexes/pom.xml b/generate-indexes/pom.xml similarity index 100% rename from public/generate-indexes/pom.xml rename to generate-indexes/pom.xml diff --git a/public/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala similarity index 98% rename from public/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala rename to generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala index 6e2aa683f8e6e2abe31e2e8307d71db8c41c5258..d7c40fb76197f77ddb944803c113b65cf124a0bf 100644 --- a/public/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala +++ b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala @@ -30,8 +30,8 @@ import nl.lumc.sasc.biopet.extensions.picard.CreateSequenceDictionary import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFaidx import nl.lumc.sasc.biopet.utils.ConfigUtils import org.broadinstitute.gatk.queue.QScript -import scala.language.reflectiveCalls +import scala.language.reflectiveCalls import scala.collection.JavaConversions._ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript { @@ -173,7 +173,7 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript genomeConfig.get("dbsnp_vcf_uri").foreach { dbsnpUri => val cv = new CombineVariants(this) - cv.reference = fastaFile + cv.reference_sequence = fastaFile cv.deps ::= createDict.output def addDownload(uri: String): Unit = { val curl = new Curl(this) @@ -181,7 +181,7 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript curl.output = new File(annotationDir, new File(curl.url).getName) curl.isIntermediate = true add(curl) - cv.inputFiles ::= curl.output + cv.variant :+= curl.output val tabix = new Tabix(this) tabix.input = curl.output @@ -198,7 +198,7 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript case _ => addDownload(dbsnpUri.toString) } - cv.outputFile = new File(annotationDir, "dbsnp.vcf.gz") + cv.out = new File(annotationDir, "dbsnp.vcf.gz") add(cv) } diff --git a/public/biopet-public-package/.gitignore b/gentrap/.gitignore similarity index 100% rename from public/biopet-public-package/.gitignore rename to gentrap/.gitignore diff --git a/public/gentrap/pom.xml b/gentrap/pom.xml similarity index 100% rename from public/gentrap/pom.xml rename to gentrap/pom.xml diff --git a/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/gentrapFront.ssp b/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/gentrapFront.ssp similarity index 100% rename from public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/gentrapFront.ssp rename to gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/gentrapFront.ssp diff --git a/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/measure_plotreport.ssp b/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/measure_plotreport.ssp similarity index 100% rename from public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/measure_plotreport.ssp rename to gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/measure_plotreport.ssp diff --git a/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/plot_heatmap.R b/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/plot_heatmap.R similarity index 100% rename from public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/plot_heatmap.R rename to gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/plot_heatmap.R diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala similarity index 100% rename from public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala rename to gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapReport.scala b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapReport.scala similarity index 100% rename from public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapReport.scala rename to gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapReport.scala diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BaseCounts.scala b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BaseCounts.scala similarity index 100% rename from public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BaseCounts.scala rename to gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BaseCounts.scala diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksBlind.scala b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksBlind.scala similarity index 100% rename from public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksBlind.scala rename to gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksBlind.scala diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksGuided.scala b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksGuided.scala similarity index 100% rename from public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksGuided.scala rename to gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksGuided.scala diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksMeasurement.scala b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksMeasurement.scala similarity index 100% rename from public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksMeasurement.scala rename to gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksMeasurement.scala diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksStrict.scala b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksStrict.scala similarity index 100% rename from public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksStrict.scala rename to gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksStrict.scala diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala similarity index 100% rename from public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala rename to gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala similarity index 100% rename from public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala rename to gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala similarity index 82% rename from public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala rename to gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala index b07d295a88d9044f351f738320c23692f53711bf..8f470ee063d85c5c4b14e6e261c4cc1cb323ff2d 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala +++ b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala @@ -29,6 +29,7 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript => /** * Method to add a bamFile to the pipeline + * * @param id Unique id used for this bam file, most likely to be a sampleName * @param file Location of the bam file */ @@ -51,6 +52,8 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript => require(bamFiles.nonEmpty) } + lazy val mergeCountFiles: Boolean = config("merge_count_files", default = true) + private var extraSummaryFiles: Map[String, File] = Map() def addMergeTableJob(countFiles: List[File], @@ -58,18 +61,22 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript => name: String, fileExtension: String, args: MergeArgs = mergeArgs): Unit = { - add(MergeTables(this, countFiles, outputFile, - args.idCols, args.valCol, args.numHeaderLines, args.fallback, fileExtension = Some(fileExtension))) - extraSummaryFiles += s"${name}_table" -> outputFile + if (mergeCountFiles) { + add(MergeTables(this, countFiles, outputFile, + args.idCols, args.valCol, args.numHeaderLines, args.fallback, fileExtension = Some(fileExtension))) + extraSummaryFiles += s"${name}_table" -> outputFile + } } def addHeatmapJob(countTable: File, outputFile: File, name: String, countType: Option[String] = None): Unit = { - val job = new PlotHeatmap(qscript) - job.input = countTable - job.output = outputFile - job.countType = countType - add(job) - extraSummaryFiles += s"${name}_heatmap" -> outputFile + if (mergeCountFiles) { + val job = new PlotHeatmap(qscript) + job.input = countTable + job.output = outputFile + job.countType = countType + add(job) + extraSummaryFiles += s"${name}_heatmap" -> outputFile + } } /** Must return a map with used settings for this pipeline */ diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/PlotHeatmap.scala b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/PlotHeatmap.scala similarity index 100% rename from public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/PlotHeatmap.scala rename to gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/PlotHeatmap.scala diff --git a/public/gentrap/src/test/resources/log4j.properties b/gentrap/src/test/resources/log4j.properties similarity index 100% rename from public/gentrap/src/test/resources/log4j.properties rename to gentrap/src/test/resources/log4j.properties diff --git a/public/carp/src/test/resources/ref.dict b/gentrap/src/test/resources/ref.dict similarity index 100% rename from public/carp/src/test/resources/ref.dict rename to gentrap/src/test/resources/ref.dict diff --git a/public/carp/src/test/resources/ref.fa b/gentrap/src/test/resources/ref.fa similarity index 100% rename from public/carp/src/test/resources/ref.fa rename to gentrap/src/test/resources/ref.fa diff --git a/public/carp/src/test/resources/ref.fa.fai b/gentrap/src/test/resources/ref.fa.fai similarity index 100% rename from public/carp/src/test/resources/ref.fa.fai rename to gentrap/src/test/resources/ref.fa.fai diff --git a/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala b/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala similarity index 100% rename from public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala rename to gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala diff --git a/public/gwas-test/pom.xml b/gwas-test/pom.xml similarity index 100% rename from public/gwas-test/pom.xml rename to gwas-test/pom.xml diff --git a/public/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala b/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala similarity index 94% rename from public/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala rename to gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala index afc6bbdc3ba737db63f5c4270009de0a60b8deaa..d2303ac3e014110652af209dbaee180565405ca0 100644 --- a/public/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala +++ b/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala @@ -82,10 +82,10 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R gensToVcf.outputVcf = new File(outputDirGens, gen._1.genotypes.getName + s".${gen._2}.vcf.gz") gensToVcf.isIntermediate = true add(gensToVcf) - cvChr.inputFiles :+= gensToVcf.outputVcf + cvChr.variant :+= gensToVcf.outputVcf } add(cvChr) - cvTotal.inputFiles :+= cvChr.outputFile + cvTotal.variant :+= cvChr.outputFile contig -> cvChr.outputFile } add(cvTotal) @@ -105,14 +105,14 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R bedFile.deleteOnExit() val sv = new SelectVariants(this) - sv.inputFiles :+= chrVcfFiles.getOrElse(region.chr, vcfFile) - sv.outputFile = new File(regionDir, s"$name.vcf.gz") + sv.variant = chrVcfFiles.getOrElse(region.chr, vcfFile) + sv.out = new File(regionDir, s"$name.vcf.gz") sv.intervals :+= bedFile sv.isIntermediate = true add(sv) val snptest = new Snptest(this) - snptest.inputGenotypes :+= sv.outputFile + snptest.inputGenotypes :+= sv.out snptest.inputSampleFiles :+= phenotypeFile snptest.outputFile = Some(new File(regionDir, s"$name.snptest")) add(snptest) @@ -127,7 +127,7 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R } val cv = new CatVariants(this) - cv.inputFiles = snpTests.map(_._2).toList + cv.variant = snpTests.map(_._2).toList cv.outputFile = new File(outputDir, "snptest" + File.separator + "snptest.vcf.gz") add(cv) } diff --git a/public/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/impute/ImputeOutput.scala b/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/impute/ImputeOutput.scala similarity index 100% rename from public/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/impute/ImputeOutput.scala rename to gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/impute/ImputeOutput.scala diff --git a/public/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/impute/Spec.scala b/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/impute/Spec.scala similarity index 100% rename from public/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/impute/Spec.scala rename to gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/impute/Spec.scala diff --git a/public/gwas-test/src/test/resources/fake_chrQ.dict b/gwas-test/src/test/resources/fake_chrQ.dict similarity index 100% rename from public/gwas-test/src/test/resources/fake_chrQ.dict rename to gwas-test/src/test/resources/fake_chrQ.dict diff --git a/public/gwas-test/src/test/resources/fake_chrQ.fa b/gwas-test/src/test/resources/fake_chrQ.fa similarity index 100% rename from public/gwas-test/src/test/resources/fake_chrQ.fa rename to gwas-test/src/test/resources/fake_chrQ.fa diff --git a/public/gwas-test/src/test/resources/fake_chrQ.fa.fai b/gwas-test/src/test/resources/fake_chrQ.fa.fai similarity index 100% rename from public/gwas-test/src/test/resources/fake_chrQ.fa.fai rename to gwas-test/src/test/resources/fake_chrQ.fa.fai diff --git a/public/gwas-test/src/test/resources/log4j.properties b/gwas-test/src/test/resources/log4j.properties similarity index 100% rename from public/gwas-test/src/test/resources/log4j.properties rename to gwas-test/src/test/resources/log4j.properties diff --git a/public/gwas-test/src/test/resources/specs/files.specs b/gwas-test/src/test/resources/specs/files.specs similarity index 100% rename from public/gwas-test/src/test/resources/specs/files.specs rename to gwas-test/src/test/resources/specs/files.specs diff --git a/public/gwas-test/src/test/resources/specs/test.gens b/gwas-test/src/test/resources/specs/test.gens similarity index 100% rename from public/gwas-test/src/test/resources/specs/test.gens rename to gwas-test/src/test/resources/specs/test.gens diff --git a/public/gwas-test/src/test/resources/specs/test.gens_info b/gwas-test/src/test/resources/specs/test.gens_info similarity index 100% rename from public/gwas-test/src/test/resources/specs/test.gens_info rename to gwas-test/src/test/resources/specs/test.gens_info diff --git a/public/gwas-test/src/test/resources/specs/test.gens_info_by_sample b/gwas-test/src/test/resources/specs/test.gens_info_by_sample similarity index 100% rename from public/gwas-test/src/test/resources/specs/test.gens_info_by_sample rename to gwas-test/src/test/resources/specs/test.gens_info_by_sample diff --git a/public/gwas-test/src/test/resources/specs/test.gens_summary b/gwas-test/src/test/resources/specs/test.gens_summary similarity index 100% rename from public/gwas-test/src/test/resources/specs/test.gens_summary rename to gwas-test/src/test/resources/specs/test.gens_summary diff --git a/public/gwas-test/src/test/resources/specs/test.gens_warnings b/gwas-test/src/test/resources/specs/test.gens_warnings similarity index 100% rename from public/gwas-test/src/test/resources/specs/test.gens_warnings rename to gwas-test/src/test/resources/specs/test.gens_warnings diff --git a/public/gwas-test/src/test/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTestTest.scala b/gwas-test/src/test/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTestTest.scala similarity index 100% rename from public/gwas-test/src/test/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTestTest.scala rename to gwas-test/src/test/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTestTest.scala diff --git a/public/carp/.gitignore b/kopisu/.gitignore similarity index 100% rename from public/carp/.gitignore rename to kopisu/.gitignore diff --git a/public/kopisu/pom.xml b/kopisu/pom.xml similarity index 100% rename from public/kopisu/pom.xml rename to kopisu/pom.xml diff --git a/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferPipeline.scala b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferPipeline.scala similarity index 100% rename from public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferPipeline.scala rename to kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferPipeline.scala diff --git a/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferSummary.scala b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferSummary.scala similarity index 100% rename from public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferSummary.scala rename to kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferSummary.scala diff --git a/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/Kopisu.scala b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/Kopisu.scala similarity index 100% rename from public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/Kopisu.scala rename to kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/Kopisu.scala diff --git a/public/kopisu/src/test/resources/log4j.properties b/kopisu/src/test/resources/log4j.properties similarity index 100% rename from public/kopisu/src/test/resources/log4j.properties rename to kopisu/src/test/resources/log4j.properties diff --git a/public/flexiprep/.gitignore b/mapping/.gitignore similarity index 100% rename from public/flexiprep/.gitignore rename to mapping/.gitignore diff --git a/public/mapping/pom.xml b/mapping/pom.xml similarity index 100% rename from public/mapping/pom.xml rename to mapping/pom.xml diff --git a/public/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/mappingFront.ssp b/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/mappingFront.ssp similarity index 100% rename from public/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/mappingFront.ssp rename to mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/mappingFront.ssp diff --git a/public/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/multisampleMappingFront.ssp b/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/multisampleMappingFront.ssp similarity index 100% rename from public/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/multisampleMappingFront.ssp rename to mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/multisampleMappingFront.ssp diff --git a/public/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp b/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp similarity index 100% rename from public/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp rename to mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp diff --git a/public/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/scripts/tophat-recondition.py b/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/scripts/tophat-recondition.py similarity index 100% rename from public/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/scripts/tophat-recondition.py rename to mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/scripts/tophat-recondition.py diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala similarity index 100% rename from public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala rename to mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala b/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala similarity index 95% rename from public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala rename to mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala index 3fc93747477ed985f4e5ccf02f2745513f101817..14606ca71b21171800ebf6d1da9d42f4f89f98b5 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala +++ b/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala @@ -33,7 +33,7 @@ object MappingReport extends ReportBuilder { /** Name of report */ val reportName = "Mapping Report" - override def extFiles = super.extFiles ++ List("js/gears.js") + override def extFiles = super.extFiles ++ List("js/gears.js", "js/krona-2.0.js", "img/krona/loading.gif", "img/krona/hidden.png", "img/krona/favicon.ico") .map(x => ExtFile("/nl/lumc/sasc/biopet/pipelines/gears/report/ext/" + x, x)) def krakenExecuted = summary.getValue(sampleId, libId, "gears", "stats", "krakenreport").isDefined diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala b/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala similarity index 100% rename from public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala rename to mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTrait.scala b/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTrait.scala similarity index 100% rename from public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTrait.scala rename to mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTrait.scala diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/scripts/TophatRecondition.scala b/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/scripts/TophatRecondition.scala similarity index 100% rename from public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/scripts/TophatRecondition.scala rename to mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/scripts/TophatRecondition.scala diff --git a/public/mapping/src/test/resources/log4j.properties b/mapping/src/test/resources/log4j.properties similarity index 100% rename from public/mapping/src/test/resources/log4j.properties rename to mapping/src/test/resources/log4j.properties diff --git a/public/mapping/src/test/resources/ref.1.bt2 b/mapping/src/test/resources/ref.1.bt2 similarity index 100% rename from public/mapping/src/test/resources/ref.1.bt2 rename to mapping/src/test/resources/ref.1.bt2 diff --git a/public/mapping/src/test/resources/ref.1.ebwt b/mapping/src/test/resources/ref.1.ebwt similarity index 100% rename from public/mapping/src/test/resources/ref.1.ebwt rename to mapping/src/test/resources/ref.1.ebwt diff --git a/public/gentrap/src/test/resources/ref.dict b/mapping/src/test/resources/ref.dict similarity index 100% rename from public/gentrap/src/test/resources/ref.dict rename to mapping/src/test/resources/ref.dict diff --git a/public/gentrap/src/test/resources/ref.fa b/mapping/src/test/resources/ref.fa similarity index 100% rename from public/gentrap/src/test/resources/ref.fa rename to mapping/src/test/resources/ref.fa diff --git a/public/gentrap/src/test/resources/ref.fa.fai b/mapping/src/test/resources/ref.fa.fai similarity index 100% rename from public/gentrap/src/test/resources/ref.fa.fai rename to mapping/src/test/resources/ref.fa.fai diff --git a/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala b/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala similarity index 100% rename from public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala rename to mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala diff --git a/pom.xml b/pom.xml index e383c77a4d26cee678f1c3446134c8582c60b28b..a7d24f0670a268d3aa738254ded8ba8006f58ee9 100644 --- a/pom.xml +++ b/pom.xml @@ -1,22 +1,280 @@ <?xml version="1.0" encoding="UTF-8"?> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> +<!-- + + Biopet is built on top of GATK Queue for building bioinformatic + pipelines. It is mainly intended to support LUMC SHARK cluster which is running + SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + should also be able to execute Biopet tools and pipelines. + + Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + + Contact us at: sasc@lumc.nl + + A dual licensing mode is applied. The source code within this project that are + not part of GATK Queue is freely available for non-commercial use under an AGPL + license; For commercial users or users who do not want to follow the AGPL + license, please contact us to obtain a separate license. + +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> - <artifactId>BiopetRoot</artifactId> + <artifactId>Biopet</artifactId> + <groupId>nl.lumc.sasc</groupId> + <name>Biopet</name> <packaging>pom</packaging> - <name>BiopetRoot</name> - - <parent> - <groupId>nl.lumc.sasc</groupId> - <artifactId>Biopet</artifactId> - <version>0.7.0-SNAPSHOT</version> - <relativePath>public</relativePath> - </parent> + <version>0.7.0-SNAPSHOT</version> <modules> - <module>public</module> - <module>protected</module> + <module>biopet-package</module> + <module>bam2wig</module> + <module>bammetrics</module> + <module>basty</module> + <module>carp</module> + <module>flexiprep</module> + <module>gears</module> + <module>generate-indexes</module> + <module>gentrap</module> + <module>kopisu</module> + <module>mapping</module> + <module>sage</module> + <module>shiva</module> + <module>tinycap</module> + <module>toucan</module> + <module>biopet-core</module> + <module>biopet-utils</module> + <module>biopet-tools</module> + <module>biopet-tools-extensions</module> + <module>biopet-extensions</module> + <module>biopet-tools-package</module> + <module>gwas-test</module> <module>external-example</module> - <!--<module>biopet-aggregate</module>--> </modules> + + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + <scoverage.plugin.version>1.1.1</scoverage.plugin.version> + <scalaVersion>2.10.4</scalaVersion> + <scoverage.aggregate>true</scoverage.aggregate> + </properties> + + <build> + <sourceDirectory>${basedir}/src/main/scala</sourceDirectory> + <testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory> + <testResources> + <testResource> + <directory>${basedir}/src/test/resources</directory> + <includes> + <include>**/*</include> + </includes> + </testResource> + </testResources> + <resources> + <resource> + <directory>${basedir}/src/main/resources</directory> + <includes> + <include>**/*</include> + </includes> + </resource> + </resources> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <version>2.18.1</version> + <configuration> + <forkCount>1C</forkCount> + <argLine>-Xmx300m</argLine> + <workingDirectory>${project.build.directory}</workingDirectory> + </configuration> + </plugin> + <plugin> + <artifactId>maven-dependency-plugin</artifactId> + <version>2.10</version> + <executions> + <execution> + <id>copy-installed</id> + <phase>prepare-package</phase> + <goals> + <goal>list</goal> + </goals> + <configuration> + <outputFile>${project.build.outputDirectory}/dependency_list.txt</outputFile> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>net.alchim31.maven</groupId> + <artifactId>scala-maven-plugin</artifactId> + <version>3.2.0</version> + <executions> + <execution> + <id>scala-compile</id> + <goals> + <goal>compile</goal> + <goal>testCompile</goal> + </goals> + <configuration> + <args> + <arg>-dependencyfile</arg> + <arg>${project.build.directory}/.scala_dependencies</arg> + <arg>-deprecation</arg> + <arg>-feature</arg> + </args> + </configuration> + </execution> + </executions> + <!-- ... (see other usage or goals for details) ... --> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jar-plugin</artifactId> + <version>2.5</version> + <executions> + <execution> + <goals> + <goal>test-jar</goal> + </goals> + </execution> + </executions> + <configuration> + <archive> + <manifest> + <addDefaultImplementationEntries>true</addDefaultImplementationEntries> + <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries> + </manifest> + </archive> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <version>2.3.2</version> + <configuration> + <showDeprecation>true</showDeprecation> + </configuration> + </plugin> + <plugin> + <groupId>org.scalariform</groupId> + <artifactId>scalariform-maven-plugin</artifactId> + <version>0.1.4</version> + <executions> + <execution> + <phase>process-sources</phase> + <goals> + <goal>format</goal> + </goals> + <configuration> + <baseDir>${basedir}/src</baseDir> + <rewriteArrowSymbols>false</rewriteArrowSymbols> + <alignParameters>true</alignParameters> + <alignSingleLineCaseStatements_maxArrowIndent>40 + </alignSingleLineCaseStatements_maxArrowIndent> + <alignSingleLineCaseStatements>true</alignSingleLineCaseStatements> + <compactStringConcatenation>false</compactStringConcatenation> + <compactControlReadability>false</compactControlReadability> + <doubleIndentClassDeclaration>false</doubleIndentClassDeclaration> + <formatXml>true</formatXml> + <indentLocalDefs>false</indentLocalDefs> + <indentPackageBlocks>true</indentPackageBlocks> + <indentSpaces>2</indentSpaces> + <placeScaladocAsterisksBeneathSecondAsterisk>false + </placeScaladocAsterisksBeneathSecondAsterisk> + <preserveDanglingCloseParenthesis>true</preserveDanglingCloseParenthesis> + <preserveSpaceBeforeArguments>false</preserveSpaceBeforeArguments> + <rewriteArrowSymbols>false</rewriteArrowSymbols> + <spaceBeforeColon>false</spaceBeforeColon> + <spaceInsideBrackets>false</spaceInsideBrackets> + <spaceInsideParentheses>false</spaceInsideParentheses> + <spacesWithinPatternBinders>true</spacesWithinPatternBinders> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>pl.project13.maven</groupId> + <artifactId>git-commit-id-plugin</artifactId> + <version>2.1.10</version> + <executions> + <execution> + <goals> + <goal>revision</goal> + </goals> + </execution> + </executions> + <configuration> + <prefix>git</prefix> + <dateFormat>dd.MM.yyyy '@' HH:mm:ss z</dateFormat> + <verbose>false</verbose> + <dotGitDirectory>${basedir}/../../.git</dotGitDirectory> + <useNativeGit>true</useNativeGit> + <skipPoms>false</skipPoms> + <generateGitPropertiesFile>true</generateGitPropertiesFile> + <generateGitPropertiesFilename>src/main/resources/git.properties</generateGitPropertiesFilename> + <failOnNoGitDirectory>false</failOnNoGitDirectory> + <abbrevLength>8</abbrevLength> + <skip>false</skip> + <gitDescribe> + <skip>false</skip> + <always>false</always> + <abbrev>8</abbrev> + <dirty>-dirty</dirty> + <forceLongFormat>false</forceLongFormat> + </gitDescribe> + </configuration> + </plugin> + <plugin> + <groupId>com.mycila</groupId> + <artifactId>license-maven-plugin</artifactId> + <version>2.6</version> + <configuration> + <excludes> + <exclude>**/*git*</exclude> + <exclude>**/License*</exclude> + <exclude>**/*.bam</exclude> + <exclude>**/*.bai</exclude> + <exclude>**/*.gtf</exclude> + <exclude>**/*.fq</exclude> + <exclude>**/*.sam</exclude> + <exclude>**/*.bed</exclude> + <exclude>**/*.refFlat</exclude> + <exclude>**/*.R</exclude> + <exclude>**/*.rscript</exclude> + </excludes> + </configuration> + </plugin> + <plugin> + <groupId>org.scoverage</groupId> + <artifactId>scoverage-maven-plugin</artifactId> + <version>${scoverage.plugin.version}</version> + <configuration> + <scalaVersion>${scalaVersion}</scalaVersion> + <aggregate>true</aggregate> + <highlighting>true</highlighting> + <aggregate>true</aggregate> + <!-- other parameters --> + </configuration> + </plugin> + </plugins> + </build> + <reporting> + <plugins> + <plugin> + <groupId>org.scoverage</groupId> + <artifactId>scoverage-maven-plugin</artifactId> + <version>${scoverage.plugin.version}</version> + <configuration> + <aggregate>true</aggregate> <!-- for aggregated report --> + <highlighting>true</highlighting> + </configuration> + <reportSets> + <reportSet> + <reports> + <report>report</report> <!-- select only one report from: report, integration-report and report-only reporters --> + </reports> + </reportSet> + </reportSets> + </plugin> + </plugins> + </reporting> </project> diff --git a/protected/LICENSE b/protected/LICENSE deleted file mode 120000 index b1df9c9b08f76e7d2dc7fc8bed9e01802893b538..0000000000000000000000000000000000000000 --- a/protected/LICENSE +++ /dev/null @@ -1 +0,0 @@ -biopet-gatk-extensions/src/main/resources/nl/lumc/sasc/biopet/License.txt \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/pom.xml b/protected/biopet-gatk-extensions/pom.xml deleted file mode 100644 index 667dd080caa402dbc3168a73ed77be40c64f30f8..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/pom.xml +++ /dev/null @@ -1,37 +0,0 @@ -<!-- - - Due to the license issue with GATK, this part of Biopet can only be used inside the - LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - on how to use this protected part of biopet or contact us at sasc@lumc.nl - ---> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <artifactId>BiopetGatkExtensions</artifactId> - <packaging>jar</packaging> - - <parent> - <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetGatk</artifactId> - <version>0.7.0-SNAPSHOT</version> - <relativePath>../</relativePath> - </parent> - - <inceptionYear>2014</inceptionYear> - <name>BiopetGatkExtensions</name> - - <dependencies> - <dependency> - <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetExtensions</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>org.broadinstitute.gatk</groupId> - <artifactId>gatk-queue-extensions-distribution</artifactId> - <version>3.5</version> - </dependency> - </dependencies> -</project> diff --git a/protected/biopet-gatk-extensions/src/main/resources/nl/lumc/sasc/biopet/License.txt b/protected/biopet-gatk-extensions/src/main/resources/nl/lumc/sasc/biopet/License.txt deleted file mode 100644 index b38db2c756a4e05f7769dc6ae3d4f18ad7070dab..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/resources/nl/lumc/sasc/biopet/License.txt +++ /dev/null @@ -1,3 +0,0 @@ -Due to the license issue with GATK, this part of Biopet can only be used inside the -LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions -on how to use this protected part of biopet or contact us at sasc@lumc.nl diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/AnalyzeCovariates.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/AnalyzeCovariates.scala deleted file mode 100644 index 277390751529e743644dce7a2d9396b8d10b1228..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/AnalyzeCovariates.scala +++ /dev/null @@ -1,23 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable - -class AnalyzeCovariates(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.AnalyzeCovariates with GatkGeneral { -} - -object AnalyzeCovariates { - def apply(root: Configurable, before: File, after: File, plots: File): AnalyzeCovariates = { - val ac = new AnalyzeCovariates(root) - ac.before = before - ac.after = after - ac.plots = plots - ac - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ApplyRecalibration.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ApplyRecalibration.scala deleted file mode 100644 index 8b8ea7d5a97ca2095c981b1b11a3bae6496583a8..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ApplyRecalibration.scala +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable - -class ApplyRecalibration(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.ApplyRecalibration with GatkGeneral { - scatterCount = config("scattercount", default = 0) - - override val defaultThreads = 3 - - override def freezeFieldValues() { - super.freezeFieldValues() - - nt = Option(getThreads) - memoryLimit = Option(nt.getOrElse(1) * 2) - - import org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode - if (mode == Mode.INDEL) ts_filter_level = config("ts_filter_level", default = 99.0) - else if (mode == Mode.SNP) ts_filter_level = config("ts_filter_level", default = 99.5) - ts_filter_level = config("ts_filter_level") - } -} - -object ApplyRecalibration { - def apply(root: Configurable, input: File, output: File, recal_file: File, tranches_file: File, indel: Boolean = false): ApplyRecalibration = { - val ar = if (indel) new ApplyRecalibration(root) { - mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL - } - else new ApplyRecalibration(root) { - mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP - } - ar.input :+= input - ar.recal_file = recal_file - ar.tranches_file = tranches_file - ar.out = output - ar - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala deleted file mode 100644 index d51a28375b07aea9d3462f9d2b5f185fc2f4b629..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala +++ /dev/null @@ -1,25 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable - -class BaseRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.BaseRecalibrator with GatkGeneral { - if (config.contains("scattercount")) scatterCount = config("scattercount", default = 1) - if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").asString) - if (config.contains("known_sites")) knownSites :+= new File(config("known_sites").asString) -} - -object BaseRecalibrator { - def apply(root: Configurable, input: File, output: File): BaseRecalibrator = { - val br = new BaseRecalibrator(root) - br.input_file :+= input - br.out = output - br - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineGVCFs.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineGVCFs.scala deleted file mode 100644 index 138067f1679f3adbeae3d3dc366ea3ecf6355df6..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineGVCFs.scala +++ /dev/null @@ -1,23 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable - -class CombineGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.CombineGVCFs with GatkGeneral { - if (config.contains("scattercount")) scatterCount = config("scattercount") -} - -object CombineGVCFs { - def apply(root: Configurable, input: List[File], output: File): CombineGVCFs = { - val cg = new CombineGVCFs(root) - cg.variant = input - cg.o = output - cg - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineVariants.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineVariants.scala deleted file mode 100644 index b811327b9cb154277b3efb7487a1a2085ae4b9d4..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineVariants.scala +++ /dev/null @@ -1,23 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable - -class CombineVariants(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.CombineVariants with GatkGeneral { - if (config.contains("scattercount")) scatterCount = config("scattercount") -} - -object CombineVariants { - def apply(root: Configurable, input: List[File], output: File): CombineVariants = { - val cv = new CombineVariants(root) - cv.variant = input - cv.out = output - cv - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala deleted file mode 100644 index 30ec0d09721ee7d06c69da3fffa3681579b7cb19..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala +++ /dev/null @@ -1,46 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Gather, Output } - -class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.GenotypeGVCFs with GatkGeneral { - - @Gather(enabled = false) - @Output(required = false) - protected var vcfIndex: File = _ - - annotation ++= config("annotation", default = Seq(), freeVar = false).asStringList - - if (config.contains("dbsnp")) dbsnp = config("dbsnp") - if (config.contains("scattercount", "genotypegvcfs")) scatterCount = config("scattercount") - - if (config("inputtype", default = "dna").asString == "rna") { - stand_call_conf = config("stand_call_conf", default = 20) - stand_emit_conf = config("stand_emit_conf", default = 0) - } else { - stand_call_conf = config("stand_call_conf", default = 30) - stand_emit_conf = config("stand_emit_conf", default = 0) - } - - override def freezeFieldValues(): Unit = { - super.freezeFieldValues() - if (out.getName.endsWith(".vcf.gz")) vcfIndex = new File(out.getAbsolutePath + ".tbi") - } -} - -object GenotypeGVCFs { - def apply(root: Configurable, gvcfFiles: List[File], output: File): GenotypeGVCFs = { - val gg = new GenotypeGVCFs(root) - gg.variant = gvcfFiles - gg.out = output - if (gg.out.getName.endsWith(".vcf.gz")) gg.vcfIndex = new File(gg.out.getAbsolutePath + ".tbi") - gg - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala deleted file mode 100644 index d7a07538c9cfcff82786c0d09d38001899783af5..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Gather, Output } -import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType - -class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.HaplotypeCaller with GatkGeneral { - - @Gather(enabled = false) - @Output(required = false) - protected var vcfIndex: File = _ - - override val defaultThreads = 1 - - min_mapping_quality_score = config("minMappingQualityScore", default = 20) - scatterCount = config("scattercount", default = 1) - if (config.contains("dbsnp")) this.dbsnp = config("dbsnp") - this.sample_ploidy = config("ploidy") - if (config.contains("bamOutput")) bamOutput = config("bamOutput") - if (config.contains("allSitePLs")) allSitePLs = config("allSitePLs") - if (config.contains("output_mode")) { - import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._ - config("output_mode").asString match { - case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES - case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES - case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY - case e => logger.warn("output mode '" + e + "' does not exist") - } - } - - if (config("inputtype", default = "dna").asString == "rna") { - dontUseSoftClippedBases = config("dontusesoftclippedbases", default = true) - stand_call_conf = config("stand_call_conf", default = 5) - stand_emit_conf = config("stand_emit_conf", default = 0) - } else { - dontUseSoftClippedBases = config("dontusesoftclippedbases", default = false) - stand_call_conf = config("stand_call_conf", default = 5) - stand_emit_conf = config("stand_emit_conf", default = 0) - } - - override def freezeFieldValues() { - super.freezeFieldValues() - if (out.getName.endsWith(".vcf.gz")) vcfIndex = new File(out.getAbsolutePath + ".tbi") - if (bamOutput != null && nct.getOrElse(1) > 1) { - logger.warn("BamOutput is on, nct/threads is forced to set on 1, this option is only for debug") - nCoresRequest = Some(1) - } - nct = Some(getThreads) - memoryLimit = Option(memoryLimit.getOrElse(2.0) * nct.getOrElse(1)) - } -} - -object HaplotypeCaller { - def apply(root: Configurable, inputFiles: List[File], outputFile: File): HaplotypeCaller = { - val hc = new HaplotypeCaller(root) - hc.input_file = inputFiles - hc.out = outputFile - if (hc.out.getName.endsWith(".vcf.gz")) hc.vcfIndex = new File(hc.out.getAbsolutePath + ".tbi") - hc - } - - def gvcf(root: Configurable, inputFile: File, outputFile: File): HaplotypeCaller = { - val hc = apply(root, List(inputFile), outputFile) - hc.emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF - hc.variant_index_type = GATKVCFIndexType.LINEAR - hc.variant_index_parameter = Some(hc.config("variant_index_parameter", default = 128000).asInt) - hc - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala deleted file mode 100644 index 868b6ed0a62f87cf169571debd16a125ce8e9fc9..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Gather, Output } - -class IndelRealigner(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.IndelRealigner with GatkGeneral { - - @Gather(enabled = false) - @Output - protected var bamIndex: File = _ - - if (config.contains("scattercount")) scatterCount = config("scattercount") -} - -object IndelRealigner { - def apply(root: Configurable, input: File, targetIntervals: File, outputDir: File): IndelRealigner = { - val ir = new IndelRealigner(root) - ir.input_file :+= input - ir.targetIntervals = targetIntervals - ir.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.bam") - ir.bamIndex = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.bai") - ir - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/PrintReads.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/PrintReads.scala deleted file mode 100644 index 554208c3af1d791c71d97a17087f02321657de8c..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/PrintReads.scala +++ /dev/null @@ -1,23 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable - -class PrintReads(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.PrintReads with GatkGeneral { - if (config.contains("scattercount")) scatterCount = config("scattercount") -} - -object PrintReads { - def apply(root: Configurable, input: File, output: File): PrintReads = { - val br = new PrintReads(root) - br.input_file :+= input - br.out = output - br - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/RealignerTargetCreator.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/RealignerTargetCreator.scala deleted file mode 100644 index a884e83781227ce2b39b3f98573c3d89f8129d1b..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/RealignerTargetCreator.scala +++ /dev/null @@ -1,25 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable - -class RealignerTargetCreator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.RealignerTargetCreator with GatkGeneral { - if (config.contains("scattercount")) scatterCount = config("scattercount") - - if (config.contains("known")) known ++= config("known").asFileList -} - -object RealignerTargetCreator { - def apply(root: Configurable, input: File, outputDir: File): RealignerTargetCreator = { - val re = new RealignerTargetCreator(root) - re.input_file :+= input - re.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.intervals") - re - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/SelectVariants.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/SelectVariants.scala deleted file mode 100644 index abb27c5fc34d73ab62ffae928e622d6cda64c4d9..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/SelectVariants.scala +++ /dev/null @@ -1,23 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable - -class SelectVariants(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.SelectVariants with GatkGeneral { - if (config.contains("scattercount")) scatterCount = config("scattercount") -} - -object SelectVariants { - def apply(root: Configurable, input: File, output: File): SelectVariants = { - val sv = new SelectVariants(root) - sv.variant = input - sv.out = output - sv - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala deleted file mode 100644 index 71e00e0512dee4a5d7f12744d729a8166ca18fe0..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Gather, Output } - -class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.UnifiedGenotyper with GatkGeneral { - - @Gather(enabled = false) - @Output(required = false) - protected var vcfIndex: File = _ - - if (config.contains("scattercount")) scatterCount = config("scattercount") - if (config.contains("dbsnp")) this.dbsnp = config("dbsnp") - sample_ploidy = config("ploidy") - if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs") - - stand_call_conf = config("stand_call_conf", default = 5) - stand_emit_conf = config("stand_emit_conf", default = 0) - - if (config.contains("output_mode")) { - import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._ - config("output_mode").asString match { - case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES - case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES - case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY - case e => logger.warn("output mode '" + e + "' does not exist") - } - } - - override val defaultThreads = 1 - - override def freezeFieldValues() { - super.freezeFieldValues() - - genotype_likelihoods_model = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.BOTH - nct = Some(getThreads) - memoryLimit = Option(nct.getOrElse(1) * memoryLimit.getOrElse(2.0)) - } -} - -object UnifiedGenotyper { - def apply(root: Configurable, inputFiles: List[File], outputFile: File): UnifiedGenotyper = { - val ug = new UnifiedGenotyper(root) - ug.input_file = inputFiles - ug.out = outputFile - if (ug.out.getName.endsWith(".vcf.gz")) ug.vcfIndex = new File(ug.out.getAbsolutePath + ".tbi") - ug - } - -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantAnnotator.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantAnnotator.scala deleted file mode 100644 index b26549622f2ee5131538baf8cc2882416bf3ae33..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantAnnotator.scala +++ /dev/null @@ -1,25 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable - -class VariantAnnotator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantAnnotator with GatkGeneral { - if (config.contains("scattercount")) scatterCount = config("scattercount") - dbsnp = config("dbsnp") -} - -object VariantAnnotator { - def apply(root: Configurable, input: File, bamFiles: List[File], output: File): VariantAnnotator = { - val va = new VariantAnnotator(root) - va.variant = input - va.input_file = bamFiles - va.out = output - va - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantEval.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantEval.scala deleted file mode 100644 index fdb6e7e27b08793f44fb94d82c5ceef0033d4ed5..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantEval.scala +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable - -class VariantEval(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantEval with GatkGeneral { -} - -object VariantEval { - def apply(root: Configurable, sample: File, compareWith: File, - output: File): VariantEval = { - val vareval = new VariantEval(root) - vareval.eval = Seq(sample) - vareval.comp = Seq(compareWith) - vareval.out = output - vareval - } - - def apply(root: Configurable, sample: File, compareWith: File, - output: File, ST: Seq[String], EV: Seq[String]): VariantEval = { - val vareval = new VariantEval(root) - vareval.eval = Seq(sample) - vareval.comp = Seq(compareWith) - vareval.out = output - vareval.noST = true - vareval.ST = ST - vareval.noEV = true - vareval.EV = EV - vareval - } - -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantRecalibrator.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantRecalibrator.scala deleted file mode 100644 index e4fdf3d2d26875e7795802fbda50c00ac118c875..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantRecalibrator.scala +++ /dev/null @@ -1,46 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile - -class VariantRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantRecalibrator with GatkGeneral { - override val defaultThreads = 4 - - nt = Option(getThreads) - memoryLimit = Option(nt.getOrElse(1) * 2) - - if (config.contains("dbsnp")) resource :+= new TaggedFile(config("dbsnp").asString, "known=true,training=false,truth=false,prior=2.0") - - an = config("annotation", default = List("QD", "DP", "FS", "ReadPosRankSum", "MQRankSum")).asStringList - minNumBadVariants = config("minnumbadvariants") - maxGaussians = config("maxgaussians") -} - -object VariantRecalibrator { - def apply(root: Configurable, input: File, recal_file: File, tranches_file: File, indel: Boolean = false): VariantRecalibrator = { - val vr = new VariantRecalibrator(root) { - override lazy val configNamespace = "variantrecalibrator" - override def configPath: List[String] = (if (indel) "indel" else "snp") :: super.configPath - if (indel) { - mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL - if (config.contains("mills")) resource :+= new TaggedFile(config("mills").asString, "known=false,training=true,truth=true,prior=12.0") - } else { - mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP - if (config.contains("hapmap")) resource +:= new TaggedFile(config("hapmap").asString, "known=false,training=true,truth=true,prior=15.0") - if (config.contains("omni")) resource +:= new TaggedFile(config("omni").asString, "known=false,training=true,truth=true,prior=12.0") - if (config.contains("1000G")) resource +:= new TaggedFile(config("1000G").asString, "known=false,training=true,truth=false,prior=10.0") - } - } - vr.input :+= input - vr.recal_file = recal_file - vr.tranches_file = tranches_file - vr - } -} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/test/resources/log4j.properties b/protected/biopet-gatk-extensions/src/test/resources/log4j.properties deleted file mode 100644 index 52fb824b0a8088346ed39f9de816309d0569ecf6..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-extensions/src/test/resources/log4j.properties +++ /dev/null @@ -1,15 +0,0 @@ -# -# Due to the license issue with GATK, this part of Biopet can only be used inside the -# LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions -# on how to use this protected part of biopet or contact us at sasc@lumc.nl -# - -# Set root logger level to DEBUG and its only appender to A1. -log4j.rootLogger=ERROR, A1 - -# A1 is set to be a ConsoleAppender. -log4j.appender.A1=org.apache.log4j.ConsoleAppender - -# A1 uses PatternLayout. -log4j.appender.A1.layout=org.apache.log4j.PatternLayout -log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/protected/biopet-gatk-pipelines/pom.xml b/protected/biopet-gatk-pipelines/pom.xml deleted file mode 100644 index f1f5df4c43b52e1219541c5907069654d5086843..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/pom.xml +++ /dev/null @@ -1,64 +0,0 @@ -<!-- - - Due to the license issue with GATK, this part of Biopet can only be used inside the - LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - on how to use this protected part of biopet or contact us at sasc@lumc.nl - ---> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <artifactId>BiopetGatkPipelines</artifactId> - <packaging>jar</packaging> - - <parent> - <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetGatk</artifactId> - <version>0.7.0-SNAPSHOT</version> - <relativePath>../</relativePath> - </parent> - - <inceptionYear>2014</inceptionYear> - <name>BiopetGatkPipelines</name> - - <dependencies> - <dependency> - <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetCore</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetGatkExtensions</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>nl.lumc.sasc</groupId> - <artifactId>Mapping</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>nl.lumc.sasc</groupId> - <artifactId>Basty</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>nl.lumc.sasc</groupId> - <artifactId>Shiva</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>org.testng</groupId> - <artifactId>testng</artifactId> - <version>6.8</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.scalatest</groupId> - <artifactId>scalatest_2.10</artifactId> - <version>2.2.1</version> - <scope>test</scope> - </dependency> - </dependencies> -</project> diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Basty.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Basty.scala deleted file mode 100644 index ecf4ccf901f8339db7da75c499bfae17a657eb16..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Basty.scala +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.pipelines.basty.BastyTrait -import org.broadinstitute.gatk.queue.QScript - -/** - * Basty pipeline including GATK steps - * - * Created by pjvan_thof on 3/4/15. - */ -class Basty(val root: Configurable) extends QScript with BastyTrait { - qscript => - def this() = this(null) - - override def variantcallers = List("unifiedgenotyper") - - override lazy val shiva = new Shiva(qscript) -} - -object Basty extends PipelineCommand \ No newline at end of file diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala deleted file mode 100644 index 244fae67a31938f4eadaa5228c979bf4a4cd9011..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala +++ /dev/null @@ -1,135 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.extensions.gatk.broad._ -import nl.lumc.sasc.biopet.pipelines.shiva.ShivaTrait -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.QScript - -/** - * Shiva inplementation with GATK steps - * - * Created by pjvan_thof on 2/26/15. - */ -class Shiva(val root: Configurable) extends QScript with ShivaTrait { - qscript => - def this() = this(null) - - /** Make variantcalling namespace, this with the gatk modes in there */ - override def makeVariantcalling(multisample: Boolean = false) = { - if (multisample) new ShivaVariantcalling(qscript) { - override def namePrefix = "multisample" - override def configNamespace = "shivavariantcalling" - override def configPath: List[String] = super.configPath ::: "multisample" :: Nil - } - else new ShivaVariantcalling(qscript) { - override def configNamespace = "shivavariantcalling" - } - } - - /** Makes a sample */ - override def makeSample(id: String) = new this.Sample(id) - - /** Class will generate sample jobs */ - class Sample(sampleId: String) extends super.Sample(sampleId) { - /** Makes a library */ - override def makeLibrary(id: String) = new this.Library(id) - - /** Class will generate library jobs */ - class Library(libId: String) extends super.Library(libId) { - - lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true) - lazy val useBaseRecalibration: Boolean = { - val c: Boolean = config("use_base_recalibration", default = true) - val br = new BaseRecalibrator(qscript) - if (c && br.knownSites.isEmpty) - logger.warn("No Known site found, skipping base recalibration, file: " + inputBam) - c && br.knownSites.nonEmpty - } - - override def summarySettings = super.summarySettings + - ("use_indel_realigner" -> useIndelRealigner) + - ("use_base_recalibration" -> useBaseRecalibration) - - override def preProcessBam = if (useIndelRealigner && useBaseRecalibration) - bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam")) - else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam")) - else if (useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam")) - else bamFile - - override def addJobs(): Unit = { - super.addJobs() - if (useIndelRealigner && useBaseRecalibration) { - val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true) - addBaseRecalibrator(file, libDir, libraries.size > 1) - } else if (useIndelRealigner) { - addIndelRealign(bamFile.get, libDir, libraries.size > 1) - } else if (useBaseRecalibration) { - addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1) - } - } - } - - override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner) - - override def summarySettings = super.summarySettings + ("use_indel_realigner" -> useIndelRealigner) - - lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true) - - override def preProcessBam = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) { - bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam")) - } else bamFile - - override def addJobs(): Unit = { - super.addJobs() - - if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) { - addIndelRealign(bamFile.get, sampleDir, false) - } - } - } - - /** Adds indel realignment jobs */ - def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean): File = { - val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir) - realignerTargetCreator.isIntermediate = true - add(realignerTargetCreator) - - val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir) - indelRealigner.isIntermediate = isIntermediate - add(indelRealigner) - - indelRealigner.o - } - - /** Adds base recalibration jobs */ - def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean): File = { - val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal")) - - if (baseRecalibrator.knownSites.isEmpty) return inputBam - add(baseRecalibrator) - - if (config("use_analyze_covariates", default = false).asBoolean) { - val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after")) - baseRecalibratorAfter.BQSR = baseRecalibrator.o - add(baseRecalibratorAfter) - - add(AnalyzeCovariates(this, baseRecalibrator.o, baseRecalibratorAfter.o, swapExt(dir, inputBam, ".bam", ".baserecal.pdf"))) - } - - val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam")) - printReads.BQSR = baseRecalibrator.o - printReads.isIntermediate = isIntermediate - add(printReads) - - printReads.o - } -} - -/** This object give a default main methods for this pipeline */ -object Shiva extends PipelineCommand \ No newline at end of file diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala deleted file mode 100644 index edbc633f3a03204f3d4e50ac14bc016134825252..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.pipelines.gatk.variantcallers._ -import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcallingTrait -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.QScript - -/** - * ShivaVariantcalling with GATK variantcallers - * - * Created by pjvan_thof on 2/26/15. - */ -class ShivaVariantcalling(val root: Configurable) extends QScript with ShivaVariantcallingTrait { - qscript => - def this() = this(null) - - /** Will generate all available variantcallers */ - override def callersList = { - new HaplotypeCallerGvcf(this) :: - new HaplotypeCallerAllele(this) :: - new UnifiedGenotyperAllele(this) :: - new UnifiedGenotyper(this) :: - new HaplotypeCaller(this) :: - super.callersList - } -} - -/** object to add default main method to pipeline */ -object ShivaVariantcalling extends PipelineCommand \ No newline at end of file diff --git a/protected/biopet-gatk-pipelines/src/test/resources/log4j.properties b/protected/biopet-gatk-pipelines/src/test/resources/log4j.properties deleted file mode 100644 index 52fb824b0a8088346ed39f9de816309d0569ecf6..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/test/resources/log4j.properties +++ /dev/null @@ -1,15 +0,0 @@ -# -# Due to the license issue with GATK, this part of Biopet can only be used inside the -# LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions -# on how to use this protected part of biopet or contact us at sasc@lumc.nl -# - -# Set root logger level to DEBUG and its only appender to A1. -log4j.rootLogger=ERROR, A1 - -# A1 is set to be a ConsoleAppender. -log4j.appender.A1=org.apache.log4j.ConsoleAppender - -# A1 uses PatternLayout. -log4j.appender.A1.layout=org.apache.log4j.PatternLayout -log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala deleted file mode 100644 index f4576e5a10d380e572891cae1160d665ebbe8746..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala +++ /dev/null @@ -1,147 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import java.io.{ File, FileOutputStream } - -import com.google.common.io.Files -import nl.lumc.sasc.biopet.utils.config.Config -import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants -import nl.lumc.sasc.biopet.extensions.gatk.broad.{ HaplotypeCaller, UnifiedGenotyper } -import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats } -import nl.lumc.sasc.biopet.utils.ConfigUtils -import org.apache.commons.io.FileUtils -import org.broadinstitute.gatk.queue.QSettings -import org.scalatest.Matchers -import org.scalatest.testng.TestNGSuite -import org.testng.annotations.{ AfterClass, DataProvider, Test } - -import scala.collection.mutable.ListBuffer - -/** - * Class for testing ShivaVariantcalling - * - * Created by pjvan_thof on 3/2/15. - */ -class ShivaVariantcallingTest extends TestNGSuite with Matchers { - def initPipeline(map: Map[String, Any]): ShivaVariantcalling = { - new ShivaVariantcalling() { - override def configNamespace = "shivavariantcalling" - override def globalConfig = new Config(ConfigUtils.mergeMaps(map, ShivaVariantcallingTest.config)) - qSettings = new QSettings - qSettings.runName = "test" - } - } - - @DataProvider(name = "shivaVariantcallingOptions") - def shivaVariantcallingOptions = { - val bool = Array(true, false) - - (for ( - bams <- 0 to 2; - raw <- bool; - bcftools <- bool; - bcftools_singlesample <- bool; - haplotypeCallerGvcf <- bool; - haplotypeCallerAllele <- bool; - unifiedGenotyperAllele <- bool; - unifiedGenotyper <- bool; - haplotypeCaller <- bool - ) yield Array[Any](bams, raw, bcftools, bcftools_singlesample, unifiedGenotyper, haplotypeCaller, haplotypeCallerGvcf, haplotypeCallerAllele, unifiedGenotyperAllele) - ).toArray - } - - @Test(dataProvider = "shivaVariantcallingOptions") - def testShivaVariantcalling(bams: Int, - raw: Boolean, - bcftools: Boolean, - bcftools_singlesample: Boolean, - unifiedGenotyper: Boolean, - haplotypeCaller: Boolean, - haplotypeCallerGvcf: Boolean, - haplotypeCallerAllele: Boolean, - unifiedGenotyperAllele: Boolean) = { - val callers: ListBuffer[String] = ListBuffer() - if (raw) callers.append("raw") - if (bcftools) callers.append("bcftools") - if (bcftools_singlesample) callers.append("bcftools_singlesample") - if (unifiedGenotyper) callers.append("unifiedgenotyper") - if (haplotypeCallerGvcf) callers.append("haplotypecaller_gvcf") - if (haplotypeCallerAllele) callers.append("haplotypecaller_allele") - if (unifiedGenotyperAllele) callers.append("unifiedgenotyper_allele") - if (haplotypeCaller) callers.append("haplotypecaller") - val map = Map("variantcallers" -> callers.toList) - val pipeline = initPipeline(map) - - pipeline.inputBams = (for (n <- 1 to bams) yield n.toString -> ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toMap - - val illegalArgumentException = pipeline.inputBams.isEmpty || - (!raw && !bcftools && - !haplotypeCaller && !unifiedGenotyper && - !haplotypeCallerGvcf && !haplotypeCallerAllele && !unifiedGenotyperAllele && - !bcftools_singlesample) - - if (illegalArgumentException) intercept[IllegalArgumentException] { - pipeline.script() - } - - if (!illegalArgumentException) { - pipeline.script() - - pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe 1 + (if (raw) 1 else 0) - //pipeline.functions.count(_.isInstanceOf[Bcftools]) shouldBe (if (bcftools) 1 else 0) - //FIXME: Can not check for bcftools because of piping - //pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0) - pipeline.functions.count(_.isInstanceOf[VcfFilter]) shouldBe (if (raw) bams else 0) - pipeline.functions.count(_.isInstanceOf[HaplotypeCaller]) shouldBe (if (haplotypeCaller) 1 else 0) + - (if (haplotypeCallerAllele) 1 else 0) + (if (haplotypeCallerGvcf) bams else 0) - pipeline.functions.count(_.isInstanceOf[UnifiedGenotyper]) shouldBe (if (unifiedGenotyper) 1 else 0) + - (if (unifiedGenotyperAllele) 1 else 0) - pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (1 + callers.size) - } - } - - @AfterClass def removeTempOutputDir() = { - FileUtils.deleteDirectory(ShivaVariantcallingTest.outputDir) - } -} - -object ShivaVariantcallingTest { - val outputDir = Files.createTempDir() - new File(outputDir, "input").mkdirs() - def inputTouch(name: String): File = { - val file = new File(outputDir, "input" + File.separator + name).getAbsoluteFile - Files.touch(file) - file - } - - private def copyFile(name: String): Unit = { - val is = getClass.getResourceAsStream("/" + name) - val os = new FileOutputStream(new File(outputDir, name)) - org.apache.commons.io.IOUtils.copy(is, os) - os.close() - } - - copyFile("ref.fa") - copyFile("ref.dict") - copyFile("ref.fa.fai") - - val config = Map( - "name_prefix" -> "test", - "output_dir" -> outputDir, - "cache" -> true, - "dir" -> "test", - "vep_script" -> "test", - "reference_fasta" -> (outputDir + File.separator + "ref.fa"), - "gatk_jar" -> "test", - "samtools" -> Map("exe" -> "test"), - "bcftools" -> Map("exe" -> "test"), - "md5sum" -> Map("exe" -> "test"), - "bgzip" -> Map("exe" -> "test"), - "tabix" -> Map("exe" -> "test"), - "input_alleles" -> "test" - ) -} \ No newline at end of file diff --git a/protected/biopet-protected-package/pom.xml b/protected/biopet-protected-package/pom.xml deleted file mode 100644 index 03b88654a2ff63177615f1299a9e058420080ac4..0000000000000000000000000000000000000000 --- a/protected/biopet-protected-package/pom.xml +++ /dev/null @@ -1,92 +0,0 @@ -<!-- - - Due to the license issue with GATK, this part of Biopet can only be used inside the - LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - on how to use this protected part of biopet or contact us at sasc@lumc.nl - ---> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <artifactId>BiopetProtectedPackage</artifactId> - <packaging>jar</packaging> - - <parent> - <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetGatk</artifactId> - <version>0.7.0-SNAPSHOT</version> - <relativePath>../</relativePath> - </parent> - - <inceptionYear>2014</inceptionYear> - <name>BiopetProtectedPackage</name> - - <properties> - <sting.shade.phase>package</sting.shade.phase> - <app.main.class>nl.lumc.sasc.biopet.BiopetExecutableProtected</app.main.class> - </properties> - - <dependencies> - <dependency> - <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetCore</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetPublicPackage</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetGatkPipelines</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>nl.lumc.sasc</groupId> - <artifactId>Basty</artifactId> - <version>${project.version}</version> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-shade-plugin</artifactId> - <version>2.4.1</version> - <configuration> - <!--suppress MavenModelInspection --> - <finalName>Biopet-${project.version}-${git.commit.id.abbrev}</finalName> - <transformers> - <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> - <manifestEntries> - <Main-Class>${app.main.class}</Main-Class> - <!--suppress MavenModelInspection --> - <X-Compile-Source-JDK>${maven.compile.source}</X-Compile-Source-JDK> - <!--suppress MavenModelInspection --> - <X-Compile-Target-JDK>${maven.compile.target}</X-Compile-Target-JDK> - </manifestEntries> - </transformer> - </transformers> - <filters> - <filter> - <artifact>nl.lumc.sasc:BiopetFramework</artifact> - <excludes> - <exclude>nl/lumc/sasc/biopet/License.txt</exclude> - </excludes> - </filter> - </filters> - </configuration> - <executions> - <execution> - <phase>package</phase> - <goals> - <goal>shade</goal> - </goals> - </execution> - </executions> - </plugin> - </plugins> - </build> -</project> diff --git a/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableProtected.scala b/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableProtected.scala deleted file mode 100644 index 9155e7dbacf4fd624694ac06a1ddf24c69071afe..0000000000000000000000000000000000000000 --- a/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableProtected.scala +++ /dev/null @@ -1,17 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet - -import nl.lumc.sasc.biopet.utils.{ BiopetExecutable, MainCommand } - -object BiopetExecutableProtected extends BiopetExecutable { - def pipelines: List[MainCommand] = BiopetExecutablePublic.publicPipelines ::: List( - nl.lumc.sasc.biopet.pipelines.gatk.Shiva, - nl.lumc.sasc.biopet.pipelines.gatk.ShivaVariantcalling, - nl.lumc.sasc.biopet.pipelines.gatk.Basty) - - def tools = BiopetExecutablePublic.tools -} \ No newline at end of file diff --git a/protected/biopet-protected-package/src/test/resources/log4j.properties b/protected/biopet-protected-package/src/test/resources/log4j.properties deleted file mode 100644 index 52fb824b0a8088346ed39f9de816309d0569ecf6..0000000000000000000000000000000000000000 --- a/protected/biopet-protected-package/src/test/resources/log4j.properties +++ /dev/null @@ -1,15 +0,0 @@ -# -# Due to the license issue with GATK, this part of Biopet can only be used inside the -# LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions -# on how to use this protected part of biopet or contact us at sasc@lumc.nl -# - -# Set root logger level to DEBUG and its only appender to A1. -log4j.rootLogger=ERROR, A1 - -# A1 is set to be a ConsoleAppender. -log4j.appender.A1=org.apache.log4j.ConsoleAppender - -# A1 uses PatternLayout. -log4j.appender.A1.layout=org.apache.log4j.PatternLayout -log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/protected/log4j.properties b/protected/log4j.properties deleted file mode 100644 index 52fb824b0a8088346ed39f9de816309d0569ecf6..0000000000000000000000000000000000000000 --- a/protected/log4j.properties +++ /dev/null @@ -1,15 +0,0 @@ -# -# Due to the license issue with GATK, this part of Biopet can only be used inside the -# LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions -# on how to use this protected part of biopet or contact us at sasc@lumc.nl -# - -# Set root logger level to DEBUG and its only appender to A1. -log4j.rootLogger=ERROR, A1 - -# A1 is set to be a ConsoleAppender. -log4j.appender.A1=org.apache.log4j.ConsoleAppender - -# A1 uses PatternLayout. -log4j.appender.A1.layout=org.apache.log4j.PatternLayout -log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/protected/pom.xml b/protected/pom.xml deleted file mode 100644 index fa84a64573890a57aa3cafca124da515f19053de..0000000000000000000000000000000000000000 --- a/protected/pom.xml +++ /dev/null @@ -1,25 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - - Due to the license issue with GATK, this part of Biopet can only be used inside the - LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - on how to use this protected part of biopet or contact us at sasc@lumc.nl - ---> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - <parent> - <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetRoot</artifactId> - <version>0.7.0-SNAPSHOT</version> - <relativePath>../</relativePath> - </parent> - <artifactId>BiopetGatk</artifactId> - <name>BiopetGatk</name> - <packaging>pom</packaging> - <modules> - <module>biopet-gatk-extensions</module> - <module>biopet-gatk-pipelines</module> - <module>biopet-protected-package</module> - </modules> -</project> \ No newline at end of file diff --git a/protected/src/src/test/resources/log4j.properties b/protected/src/src/test/resources/log4j.properties deleted file mode 100644 index 52fb824b0a8088346ed39f9de816309d0569ecf6..0000000000000000000000000000000000000000 --- a/protected/src/src/test/resources/log4j.properties +++ /dev/null @@ -1,15 +0,0 @@ -# -# Due to the license issue with GATK, this part of Biopet can only be used inside the -# LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions -# on how to use this protected part of biopet or contact us at sasc@lumc.nl -# - -# Set root logger level to DEBUG and its only appender to A1. -log4j.rootLogger=ERROR, A1 - -# A1 is set to be a ConsoleAppender. -log4j.appender.A1=org.apache.log4j.ConsoleAppender - -# A1 uses PatternLayout. -log4j.appender.A1.layout=org.apache.log4j.PatternLayout -log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/public/.gitattributes b/public/.gitattributes deleted file mode 100644 index 492710bca31fa309d678c9c5f1047a5ed232fe16..0000000000000000000000000000000000000000 --- a/public/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -*.bam binary -*.bam.bai binary diff --git a/public/.gitignore b/public/.gitignore deleted file mode 100644 index eb9e98910e679749a1ae2334df8842db3379cc83..0000000000000000000000000000000000000000 --- a/public/.gitignore +++ /dev/null @@ -1,13 +0,0 @@ -# Project-related -dependency-reduced-pom.xml -git.properties - -# gedit -*~ -# Vim -*.swp -# IntelliJ -.idea/workspace.xml -/target/ -/public/target/ -/protected/target/ diff --git a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala deleted file mode 100644 index 8476d1bbc56270b2da0d1df6eae20e19a6886bcc..0000000000000000000000000000000000000000 --- a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.pipelines.basty - -import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.QScript - -/** - * Basty implementation without GATK parts - * - * Created by pjvan_thof on 3/4/15. - */ -class Basty(val root: Configurable) extends QScript with BastyTrait { - def this() = this(null) -} - -object Basty extends PipelineCommand \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala deleted file mode 100644 index ceceed5f64ba51a75b098e4cd1b18beaa4f1894d..0000000000000000000000000000000000000000 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.core.{ Reference, BiopetJavaCommandLineFunction } -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Input, Output } - -class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction with Reference { - - javaMainClass = classOf[org.broadinstitute.gatk.tools.CatVariants].getName - - @Input(required = true) - var inputFiles: List[File] = Nil - - @Output(required = true) - var outputFile: File = null - - @Input - var reference: File = null - - var assumeSorted = false - - override def beforeGraph(): Unit = { - super.beforeGraph() - if (reference == null) reference = referenceFasta() - } - - override def cmdLine = super.cmdLine + - repeat("-V", inputFiles) + - required("-out", outputFile) + - required("-R", reference) + - conditional(assumeSorted, "--assumeSorted") -} - -object CatVariants { - def apply(root: Configurable, input: List[File], output: File): CatVariants = { - val cv = new CatVariants(root) - cv.inputFiles = input - cv.outputFile = output - cv - } -} \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala deleted file mode 100644 index cc230a6ad48e50f782f773a54d1537a72ad85e2d..0000000000000000000000000000000000000000 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala +++ /dev/null @@ -1,73 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Input, Output } - -/** - * Extension for CombineVariants from GATK - * - * Created by pjvan_thof on 2/26/15. - */ -class CombineVariants(val root: Configurable) extends Gatk { - val analysisType = "CombineVariants" - - @Input(doc = "", required = true) - var inputFiles: List[File] = Nil - - @Output(doc = "", required = true) - var outputFile: File = null - - var setKey: String = null - var rodPriorityList: String = null - var minimumN: Int = config("minimumN", default = 1) - var genotypeMergeOptions: Option[String] = config("genotypeMergeOptions") - var excludeNonVariants: Boolean = false - - var inputMap: Map[File, String] = Map() - - def addInput(file: File, name: String): Unit = { - inputFiles :+= file - inputMap += file -> name - } - - override def beforeGraph(): Unit = { - super.beforeGraph() - if (outputFile.getName.endsWith(".vcf.gz")) outputFiles :+= new File(outputFile.getAbsolutePath + ".tbi") - genotypeMergeOptions match { - case Some("UNIQUIFY") | Some("PRIORITIZE") | Some("UNSORTED") | Some("REQUIRE_UNIQUE") | None => - case _ => throw new IllegalArgumentException("Wrong option for genotypeMergeOptions") - } - deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi")) - deps = deps.distinct - } - - override def cmdLine = super.cmdLine + - (for (file <- inputFiles) yield { - inputMap.get(file) match { - case Some(name) => required("-V:" + name, file) - case _ => required("-V", file) - } - }).mkString + - required("-o", outputFile) + - optional("--setKey", setKey) + - optional("--rod_priority_list", rodPriorityList) + - optional("-genotypeMergeOptions", genotypeMergeOptions) + - conditional(excludeNonVariants, "--excludeNonVariants") -} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala deleted file mode 100644 index 33522732f42599390238d294c8e6dfb1297f3829..0000000000000000000000000000000000000000 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.core.{ Version, BiopetJavaCommandLineFunction, Reference } -import org.broadinstitute.gatk.utils.commandline.Input - -/** - * General extension for GATK module - * - * Created by pjvan_thof on 2/26/15. - */ -abstract class Gatk extends BiopetJavaCommandLineFunction with Reference with Version { - override def subPath = "gatk" :: super.subPath - - jarFile = config("gatk_jar") - - val analysisType: String - - override def defaultCoreMemory = 3.0 - - @Input(required = true) - var reference: File = null - - @Input(required = false) - var gatkKey: Option[File] = config("gatk_key") - - @Input(required = false) - var intervals: List[File] = config("intervals", default = Nil) - - @Input(required = false) - var excludeIntervals: List[File] = config("exclude_intervals", default = Nil) - - @Input(required = false) - var pedigree: List[File] = config("pedigree", default = Nil) - - var et: Option[String] = config("et") - - def versionRegex = """(.*)""".r - override def versionExitcode = List(0, 1) - def versionCommand = executable + " -jar " + jarFile + " -version" - - override def getVersion = super.getVersion.collect { case version => "Gatk " + version } - override def dictRequired = true - - override def beforeGraph(): Unit = { - super.beforeGraph() - if (reference == null) reference = referenceFasta() - } - - override def cmdLine = super.cmdLine + - required("-T", analysisType) + - required("-R", reference) + - optional("-K", gatkKey) + - optional("-et", et) + - repeat("-L", intervals) + - repeat("-XL", excludeIntervals) + - repeat("-ped", pedigree) -} \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala deleted file mode 100644 index 519cbfad6a8db14cf4812fb0005dc7e0e0e6aa65..0000000000000000000000000000000000000000 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala +++ /dev/null @@ -1,94 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.core.summary.Summarizable -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Input, Output } -import org.broadinstitute.gatk.utils.report.{ GATKReportTable, GATKReport } - -/** - * Extension for CombineVariants from GATK - * - * Created by pjvan_thof on 2/26/15. - */ -class GenotypeConcordance(val root: Configurable) extends Gatk with Summarizable { - val analysisType = "GenotypeConcordance" - - @Input(required = true) - var evalFile: File = null - - @Input(required = true) - var compFile: File = null - - @Output(required = true) - var outputFile: File = null - - var moltenize = true - - def summaryFiles = Map("output" -> outputFile) - - def summaryStats = { - val report = new GATKReport(outputFile) - val compProportions = report.getTable("GenotypeConcordance_CompProportions") - val counts = report.getTable("GenotypeConcordance_Counts") - val evalProportions = report.getTable("GenotypeConcordance_EvalProportions") - val genotypeSummary = report.getTable("GenotypeConcordance_Summary") - val siteSummary = report.getTable("SiteConcordance_Summary") - - val samples = for (i <- 0 until genotypeSummary.getNumRows) yield genotypeSummary.get(i, "Sample").toString - - def getMap(table: GATKReportTable, column: String) = samples.distinct.map(sample => sample -> { - (for (i <- 0 until table.getNumRows if table.get(i, "Sample") == sample) yield s"${table.get(i, "Eval_Genotype")}__${table.get(i, "Comp_Genotype")}" -> table.get(i, column)).toMap - }).toMap - - Map( - "compProportions" -> getMap(compProportions, "Proportion"), - "counts" -> getMap(counts, "Count"), - "evalProportions" -> getMap(evalProportions, "Proportion"), - "genotypeSummary" -> samples.distinct.map(sample => { - val i = samples.indexOf(sample) - sample -> Map( - "Non-Reference_Discrepancy" -> genotypeSummary.get(i, "Non-Reference_Discrepancy"), - "Non-Reference_Sensitivity" -> genotypeSummary.get(i, "Non-Reference_Sensitivity"), - "Overall_Genotype_Concordance" -> genotypeSummary.get(i, "Overall_Genotype_Concordance") - ) - }).toMap, - "siteSummary" -> Map( - "ALLELES_MATCH" -> siteSummary.get(0, "ALLELES_MATCH"), - "EVAL_SUPERSET_TRUTH" -> siteSummary.get(0, "EVAL_SUPERSET_TRUTH"), - "EVAL_SUBSET_TRUTH" -> siteSummary.get(0, "EVAL_SUBSET_TRUTH"), - "ALLELES_DO_NOT_MATCH" -> siteSummary.get(0, "ALLELES_DO_NOT_MATCH"), - "EVAL_ONLY" -> siteSummary.get(0, "EVAL_ONLY"), - "TRUTH_ONLY" -> siteSummary.get(0, "TRUTH_ONLY") - ) - ) - } - - override def beforeGraph(): Unit = { - super.beforeGraph() - deps :::= (evalFile :: compFile :: Nil).filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi")) - deps = deps.distinct - } - - override def cmdLine = super.cmdLine + - required("--eval", evalFile) + - required("--comp", compFile) + - required("-o", outputFile) + - conditional(moltenize, "--moltenize") -} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala deleted file mode 100644 index 65abc60985a05edcd07aee8646f5b0c04ca839ed..0000000000000000000000000000000000000000 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Input, Output } - -/** - * Extension for CombineVariants from GATK - * - * Created by pjvan_thof on 2/26/15. - */ -class SelectVariants(val root: Configurable) extends Gatk { - val analysisType = "SelectVariants" - - @Input(doc = "", required = true) - var inputFiles: List[File] = Nil - - @Output(doc = "", required = true) - var outputFile: File = null - - var excludeNonVariants: Boolean = false - - var inputMap: Map[File, String] = Map() - - def addInput(file: File, name: String): Unit = { - inputFiles :+= file - inputMap += file -> name - } - - override def beforeGraph(): Unit = { - super.beforeGraph() - if (outputFile.getName.endsWith(".vcf.gz")) outputFiles :+= new File(outputFile.getAbsolutePath + ".tbi") - deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi")) - deps = deps.distinct - } - - override def cmdLine = super.cmdLine + - (for (file <- inputFiles) yield { - inputMap.get(file) match { - case Some(name) => required("-V:" + name, file) - case _ => required("-V", file) - } - }).mkString + - required("-o", outputFile) + - conditional(excludeNonVariants, "--excludeNonVariants") -} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala deleted file mode 100644 index f7d4b3313582c72893a02aae7ebbfa1b833d903d..0000000000000000000000000000000000000000 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.gatk.engine.phonehome.GATKRunReport -import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK - -trait GatkGeneral extends CommandLineGATK with CommandLineResources with Reference with Version { - var executable: String = config("java", default = "java", namespace = "java", freeVar = false) - - override def subPath = "gatk" :: super.subPath - - jarFile = config("gatk_jar") - - reference_sequence = referenceFasta() - - override def defaultCoreMemory = 4.0 - override def faiRequired = true - override def dictRequired = true - - if (config.contains("intervals")) intervals = config("intervals").asFileList - if (config.contains("exclude_intervals")) excludeIntervals = config("exclude_intervals").asFileList - - Option(config("et").value) match { - case Some("NO_ET") => et = GATKRunReport.PhoneHomeOption.NO_ET - case Some("AWS") => et = GATKRunReport.PhoneHomeOption.AWS - case Some("STDOUT") => et = GATKRunReport.PhoneHomeOption.STDOUT - case Some(x) => throw new IllegalArgumentException(s"Unknown et option for gatk: $x") - case _ => - } - - if (config.contains("gatk_key")) gatk_key = config("gatk_key") - if (config.contains("pedigree")) pedigree = config("pedigree") - - def versionRegex = """(.*)""".r - override def versionExitcode = List(0, 1) - def versionCommand = "java" + " -jar " + jarFile + " -version" - - override def getVersion = { - BiopetCommandLineFunction.preProcessExecutable(executable).path.foreach(executable = _) - super.getVersion.collect { case v => "Gatk " + v } - } -} diff --git a/public/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt b/public/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt deleted file mode 100644 index 74938a52b7d505b1185b1962ffe7234ddb304a52..0000000000000000000000000000000000000000 --- a/public/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt +++ /dev/null @@ -1,854 +0,0 @@ -==== - Biopet is built on top of GATK Queue for building bioinformatic - pipelines. It is mainly intended to support LUMC SHARK cluster which is running - SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - should also be able to execute Biopet tools and pipelines. - - Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - - Contact us at: sasc@lumc.nl - - A dual licensing mode is applied. The source code within this project that are - not part of GATK Queue is freely available for non-commercial use under an AGPL - license; For commercial users or users who do not want to follow the AGPL - license, please contact us to obtain a separate license. -==== - -##FastQC 0.10.1 ->>Basic Statistics pass -#Measure Value -Filename ct_r1.fq -File type Conventional base calls -Encoding Sanger / Illumina 1.9 -Total Sequences 1000 -Filtered Sequences 0 -Sequence length 100 -%GC 53 ->>END_MODULE ->>Per base sequence quality fail -#Base Mean Median Lower Quartile Upper Quartile 10th Percentile 90th Percentile -1 32.244 33.0 31.0 34.0 30.0 34.0 -2 32.589 34.0 31.0 34.0 31.0 34.0 -3 32.814 34.0 31.0 34.0 31.0 34.0 -4 36.231 37.0 35.0 37.0 35.0 37.0 -5 35.907 37.0 35.0 37.0 35.0 37.0 -6 35.934 37.0 35.0 37.0 35.0 37.0 -7 35.783 37.0 35.0 37.0 35.0 37.0 -8 36.008 37.0 35.0 37.0 35.0 37.0 -9 37.706 39.0 37.0 39.0 35.0 39.0 -10-14 37.857600000000005 39.2 37.2 39.4 34.8 39.4 -15-19 38.9788 40.2 38.0 41.0 35.0 41.0 -20-24 38.8246 40.0 38.0 41.0 34.8 41.0 -25-29 38.589600000000004 40.0 38.0 41.0 34.4 41.0 -30-34 38.3568 40.0 38.0 41.0 33.8 41.0 -35-39 38.1592 40.0 37.4 41.0 33.6 41.0 -40-44 37.4808 39.8 36.0 41.0 32.6 41.0 -45-49 36.9478 39.0 35.0 40.8 31.2 41.0 -50-54 35.845600000000005 37.8 34.6 40.0 29.4 41.0 -55-59 34.739 36.6 33.6 40.0 27.4 41.0 -60-64 34.1336 35.4 33.4 38.6 27.2 40.2 -65-69 32.7464 35.0 32.6 37.2 24.6 39.6 -70-74 29.3478 34.0 29.6 35.6 2.0 38.6 -75-79 27.4908 33.2 26.4 35.0 2.0 36.6 -80-84 25.893000000000008 33.0 21.8 35.0 2.0 35.4 -85-89 25.031799999999997 32.4 16.2 34.6 2.0 35.0 -90-94 23.9446 31.4 6.4 34.0 2.0 35.0 -95-99 22.9358 30.4 2.0 34.0 2.0 35.0 -100 21.984 30.0 2.0 34.0 2.0 35.0 ->>END_MODULE ->>Per sequence quality scores pass -#Quality Count -11 1.0 -12 4.0 -13 3.0 -14 1.0 -15 4.0 -16 4.0 -17 6.0 -18 7.0 -19 4.0 -20 2.0 -21 7.0 -22 9.0 -23 9.0 -24 17.0 -25 23.0 -26 30.0 -27 52.0 -28 39.0 -29 28.0 -30 23.0 -31 33.0 -32 43.0 -33 47.0 -34 74.0 -35 88.0 -36 148.0 -37 202.0 -38 89.0 -39 3.0 ->>END_MODULE ->>Per base sequence content fail -#Base G A T C -1 52.35707121364093 17.251755265797392 11.735205616850552 18.655967903711137 -2 34.300000000000004 11.1 24.8 29.799999999999997 -3 41.0 6.5 20.200000000000003 32.300000000000004 -4 37.5 8.7 26.0 27.800000000000004 -5 35.4 12.4 31.8 20.4 -6 57.3 11.1 1.6 30.0 -7 20.9 24.7 32.6 21.8 -8 20.0 27.200000000000003 30.0 22.8 -9 24.5 21.5 27.800000000000004 26.200000000000003 -10-14 25.22 23.28 26.26 25.240000000000002 -15-19 26.44 21.34 26.1 26.119999999999997 -20-24 25.240000000000002 22.1 24.6 28.060000000000002 -25-29 24.62 22.06 25.119999999999997 28.199999999999996 -30-34 26.240000000000002 21.44 24.279999999999998 28.04 -35-39 24.8 22.439999999999998 24.34 28.42 -40-44 25.8 22.84 23.9 27.46 -45-49 26.26 22.64 23.66 27.439999999999998 -50-54 26.72 22.58 23.18 27.52 -55-59 25.019999999999996 22.58 24.38 28.02 -60-64 26.251501802162597 22.00640768922707 23.28794553464157 28.454144973968766 -65-69 25.683829444891394 23.873692679002414 23.049074818986323 27.39340305711987 -70-74 25.554134697357206 25.44757033248082 21.717817561807333 27.28047740835465 -75-79 25.818501428257523 23.643155350472423 23.071852340145025 27.466490881125026 -80-84 26.973532796317606 23.95857307249712 21.74913693901036 27.318757192174914 -85-89 25.452016689847014 24.849327770050998 22.624014835419565 27.07464070468243 -90-94 24.547101449275363 22.35054347826087 24.139492753623188 28.962862318840582 -95-99 25.318837549655026 24.231653773782146 23.186284758519758 27.263223918043067 -100 24.0 26.0 21.9 28.1 ->>END_MODULE ->>Per base GC content fail -#Base %GC -1 71.01303911735206 -2 64.1 -3 73.3 -4 65.3 -5 55.800000000000004 -6 87.3 -7 42.699999999999996 -8 42.8 -9 50.7 -10-14 50.46000000000001 -15-19 52.559999999999995 -20-24 53.300000000000004 -25-29 52.82 -30-34 54.279999999999994 -35-39 53.22 -40-44 53.26 -45-49 53.7 -50-54 54.24 -55-59 53.04 -60-64 54.70564677613135 -65-69 53.07723250201126 -70-74 52.834612105711855 -75-79 53.28499230938255 -80-84 54.29228998849251 -85-89 52.526657394529444 -90-94 53.509963768115945 -95-99 52.5820614676981 -100 52.1 ->>END_MODULE ->>Per sequence GC content fail -#GC Content Count -0 0.0 -1 0.0 -2 0.0 -3 0.0 -4 0.0 -5 0.0 -6 0.0 -7 0.0 -8 0.0 -9 0.0 -10 0.0 -11 0.0 -12 0.0 -13 0.0 -14 0.0 -15 0.0 -16 0.0 -17 0.0 -18 0.0 -19 0.0 -20 0.0 -21 0.0 -22 0.0 -23 0.5 -24 0.5 -25 0.5 -26 1.0 -27 1.5 -28 2.0 -29 3.5 -30 5.5 -31 6.0 -32 6.5 -33 6.0 -34 4.5 -35 6.0 -36 11.0 -37 17.0 -38 21.0 -39 16.5 -40 15.0 -41 24.0 -42 28.5 -43 33.0 -44 35.5 -45 32.5 -46 32.0 -47 32.0 -48 29.5 -49 30.5 -50 30.0 -51 29.5 -52 30.0 -53 27.5 -54 26.5 -55 27.0 -56 29.5 -57 34.0 -58 36.0 -59 36.0 -60 37.0 -61 31.5 -62 24.0 -63 22.5 -64 27.0 -65 28.5 -66 20.5 -67 15.0 -68 17.0 -69 13.5 -70 8.0 -71 7.0 -72 9.0 -73 8.0 -74 5.5 -75 4.5 -76 2.0 -77 2.0 -78 3.0 -79 2.0 -80 1.5 -81 1.0 -82 0.0 -83 0.5 -84 1.0 -85 0.5 -86 0.0 -87 0.0 -88 0.0 -89 0.0 -90 0.0 -91 0.0 -92 0.0 -93 0.0 -94 0.0 -95 0.0 -96 0.0 -97 0.0 -98 0.0 -99 0.0 -100 0.0 ->>END_MODULE ->>Per base N content warn -#Base N-Count -1 0.3 -2 0.0 -3 0.0 -4 0.0 -5 0.0 -6 0.0 -7 0.0 -8 0.0 -9 0.0 -10-14 0.0 -15-19 0.0 -20-24 0.0 -25-29 0.0 -30-34 0.0 -35-39 0.0 -40-44 0.0 -45-49 0.0 -50-54 0.0 -55-59 0.0 -60-64 0.12 -65-69 0.5599999999999999 -70-74 6.16 -75-79 8.98 -80-84 13.100000000000001 -85-89 13.719999999999999 -90-94 11.68 -95-99 4.34 -100 0.0 ->>END_MODULE ->>Sequence Length Distribution pass -#Length Count -100 1000.0 ->>END_MODULE ->>Sequence Duplication Levels pass -#Total Duplicate Percentage 3.4 -#Duplication Level Relative count -1 100.0 -2 0.4140786749482402 -3 0.0 -4 0.0 -5 0.0 -6 0.0 -7 0.0 -8 0.0 -9 0.0 -10++ 0.2070393374741201 ->>END_MODULE ->>Overrepresented sequences fail -#Sequence Count Percentage Possible Source -AGATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTAT 14 1.4000000000000001 TruSeq Adapter, Index 1 (97% over 36bp) -GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATG 12 1.2 TruSeq Adapter, Index 1 (97% over 36bp) -AGGGGGAATGATGGTTGTCTTTGGATATACTACAGCGATGGCTATTGAGG 2 0.2 No Hit -GGCTTGTTTTATTTTAATGGCTGATCTATGTAATCACAGAGGCCAGTATG 2 0.2 No Hit -GTGGGGTGGTGTTTGTGGGGGACTTCATCATCTCAGGCTTCCCAGGGTCC 2 0.2 No Hit -CGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATGCCG 2 0.2 TruSeq Adapter, Index 1 (96% over 33bp) ->>END_MODULE ->>Kmer Content fail -#Sequence Count Obs/Exp Overall Obs/Exp Max Max Obs/Exp Position -AAAAA 385 7.3597403 68.038994 65-69 -AGATC 435 5.4375157 23.135067 1 -GAAGA 375 5.258809 32.443344 6 -GGAAG 420 5.044668 33.345257 5 -TCCAG 475 4.8355613 14.131038 2 -AAGAG 320 4.487517 25.954676 7 -CCAGG 475 4.4180827 17.21471 3 -GAGCA 380 4.3399205 21.1377 9 -AGCAC 395 4.2895336 15.0741825 7 -CTCCA 415 4.0171337 12.105032 95-96 -AGAGC 340 3.883087 21.137697 8 -TTTTT 280 3.8749053 8.964593 10-14 -CTTCT 370 3.8646336 11.598914 55-59 -CTGAA 305 3.812511 13.130004 90-94 -CGGAA 320 3.65467 26.422123 5 -ACCAG 335 3.6379597 10.049457 7 -TCTGA 310 3.6325634 12.308498 90-94 -CACAC 340 3.5108058 14.806036 85-89 -ATCGG 325 3.4795394 24.768969 3 -TCGGA 320 3.426008 19.815174 3 -GATCG 320 3.426008 19.815174 1 -CGTCT 355 3.387832 11.578538 85-89 -CTGCT 355 3.387832 17.662533 3 -GCACA 310 3.3664696 15.0741825 8 -TCTTC 320 3.3423858 7.7326093 50-54 -CAGCA 305 3.3121717 10.049455 6 -GAACT 260 3.2500093 13.130004 90-94 -GTCTG 320 3.2116532 12.65067 90-94 -CAGGA 280 3.197836 15.8532715 3 -AACTC 265 3.1497202 23.781752 95-96 -TGAAC 250 3.125009 13.130004 90-94 -CCAGC 350 3.0954454 6.6359653 95-96 -AGTCA 240 3.0000086 10.41078 25-29 -CACCA 290 2.9945107 6.079907 70-74 -TGCTG 295 2.960743 9.2877 2 -CAGAT 230 2.875008 11.040063 70-74 -CTTCC 315 2.8583732 10.916445 30-34 -CACGT 280 2.8504362 12.351324 85-89 -CAGGG 290 2.8367646 22.630535 9 -ACACG 260 2.8234906 13.175687 85-89 -TTCCA 250 2.7855206 9.279795 30-34 -TTCTT 230 2.765239 6.6755276 50-54 -AGCAG 240 2.7410026 15.853272 2 -TTCTG 240 2.6363494 10.165324 55-59 -ACTCC 270 2.6135564 14.526036 95-96 -GCCAG 280 2.6043434 8.607355 1 -ACGTC 255 2.595933 10.105629 85-89 -GATCT 220 2.5779483 8.675031 40-44 -TCTGC 265 2.5289452 13.2469015 2 -AAGAT 160 2.4557784 12.783248 35-39 -ATCTC 220 2.4512577 9.279794 40-44 -CAGTC 240 2.4432309 8.554544 90-94 -TCCAA 205 2.4365761 10.999062 7 -CTTTT 200 2.4045558 16.688818 6 -TTCCT 230 2.40234 9.665762 7 -CCAGT 235 2.3923304 9.4206915 25-29 -TTTCT 195 2.3444414 16.688818 8 -CTGGG 255 2.3383298 6.004135 80-84 -TGCTT 210 2.3068056 10.165323 4 -TCTTT 190 2.284328 5.5629396 15-19 -TTTTC 190 2.2843277 11.125878 7 -GGGGG 255 2.2468696 16.307867 2 -AGGAA 160 2.2437584 19.466007 5 -GTCAC 220 2.2396283 10.184532 95-96 -TCACT 200 2.2284167 8.360176 95-96 -CACTT 200 2.2284167 10.3108835 30-34 -GAAAA 135 2.2103586 10.606119 60-64 -ACTTC 195 2.172706 9.279794 30-34 -TTGAA 150 2.1582448 11.9834385 60-64 -CTCCT 235 2.1324375 16.794533 4 -TCCTC 235 2.1324372 8.397265 5 -ATCTT 165 2.11616 7.1210704 10-14 -GGGGA 205 2.1089406 14.2801 3 -ACACA 165 2.092039 11.7331705 8 -TGCAG 195 2.0877237 9.907587 5 -GACCA 190 2.0633202 10.049455 6 -AGGGG 200 2.057503 9.520067 1 -CCTCC 260 2.049668 14.590484 5 -AGGAG 170 2.0418897 5.557543 2 -TCCTT 195 2.0367663 14.498643 4 -GTCTT 185 2.032186 15.247986 7 -GCTGG 220 2.0173824 8.485845 1 -CCAGA 185 2.0090222 5.3284492 70-74 -CCTGG 230 2.0054333 8.068818 3 -GCAGG 205 2.005299 9.052214 3 -GGACC 215 1.9997637 8.607355 5 -TTCAT 155 1.987908 5.934226 2 -CCTTT 190 1.9845415 14.498643 5 -TTTCC 190 1.9845415 5.799457 15-19 -TGGCA 185 1.980661 14.861383 2 -TCTTG 180 1.977262 10.165323 5 -CCAAG 180 1.9547247 9.044511 35-39 -CTTCA 175 1.9498644 10.310883 6 -CAAGA 145 1.933477 12.339583 35-39 -CTGGA 180 1.9271295 9.907587 6 -GGCTG 210 1.9256833 16.97169 2 -AATGA 125 1.918577 7.677627 95-96 -TGAAA 125 1.918577 15.623971 60-64 -GCTTC 200 1.9086379 13.2469015 2 -GTCCA 185 1.8833237 14.131036 1 -AGAAA 115 1.882898 7.5757995 7 -TGGGG 195 1.8805519 13.386638 1 -TTCTC 180 1.880092 5.799457 25-29 -CTTGA 160 1.8748715 8.675031 60-64 -ACAAA 120 1.8682072 5.762797 40-44 -TCTCG 195 1.8609219 8.831266 5 -GGGAC 190 1.8585701 9.052216 5 -TGAGG 165 1.8578365 5.209824 2 -TGAAG 140 1.8404517 6.082693 2 -CATCT 165 1.8384434 5.155441 4 -CACTG 180 1.8324232 9.4206915 6 -CTGCA 180 1.8324231 5.3465896 90-94 -GCTGC 210 1.8310483 8.068819 1 -GCAGA 160 1.8273348 10.568848 3 -CCTTC 200 1.8148402 8.397265 9 -AGGGA 150 1.8016673 6.0081544 95-96 -TTTCA 140 1.7955297 7.1210704 15-19 -CACAG 165 1.7918309 5.432139 95-96 -AAACA 115 1.7903653 7.6389136 70-74 -ATTTT 120 1.7715117 13.661307 6 -TTTTG 140 1.7701824 17.551357 7 -GGGGC 210 1.7594293 11.629828 3 -GATTT 130 1.7534488 12.481857 6 -CAAAT 120 1.7513192 6.7527947 50-54 -GAGGG 170 1.7488776 9.520067 1 -GAAGG 145 1.7416117 6.0081544 95-96 -CATTT 135 1.7314036 5.9342256 5 -ATTTC 135 1.7314036 5.9342256 7 -CCTCT 190 1.7240983 8.397266 1 -ATCCA 145 1.7234317 5.49953 4 -GCAGC 185 1.7207267 6.9789357 95-96 -TCCTG 180 1.717774 13.2469 2 -CTCTG 180 1.717774 13.2469 2 -AAAAC 110 1.7125233 7.6389136 70-74 -CTTGG 170 1.7061908 9.2877 2 -AAAAT 95 1.7024158 8.291661 9 -TCACC 175 1.693972 8.957724 8 -TCCAC 175 1.693972 8.957724 5 -GAGAA 120 1.6828189 6.488669 6 -TCTCC 185 1.6787271 5.038359 55-59 -GAGCC 180 1.6742208 8.607355 9 -TCATC 150 1.6713123 5.1554413 2 -AGACA 125 1.6667906 6.169792 2 -TGATG 135 1.6636823 11.404236 9 -GGGAG 160 1.6460025 9.520067 1 -AGCCA 150 1.6289369 6.029673 10-14 -ATGCC 160 1.6288207 8.478622 45-49 -CTCGT 170 1.6223421 8.831266 3 -GAGGA 135 1.6215005 11.115086 3 -TGTTG 140 1.6173534 10.690706 2 -CTCAT 145 1.6156021 5.1554418 2 -CAGGT 150 1.6059413 9.907587 4 -GCTTG 160 1.6058266 9.2877 60-64 -GGGTC 175 1.6047363 12.728768 2 -TCATT 125 1.6031516 5.934226 9 -GTTGA 130 1.6020645 5.702118 1 -ACAGA 120 1.6001189 10.005068 95-96 -GGAGG 155 1.5945649 9.520067 2 -GGGGT 165 1.5912362 13.386638 1 -TGGGA 140 1.5763463 10.419649 2 -GGATG 140 1.5763462 15.629472 6 -GCCTC 190 1.575248 7.672287 2 -CCTGC 190 1.5752479 11.508429 2 -GCTCC 190 1.5752479 11.508429 6 -TCTCT 150 1.5667434 5.224736 95-96 -GGGAA 130 1.561445 11.115086 4 -TCCAT 140 1.5598917 10.3108835 8 -GGCTT 155 1.5556445 13.93155 1 -TTGAT 115 1.5511277 6.240928 4 -CATCA 130 1.5451456 5.49953 2 -AGAGA 110 1.542584 6.488669 9 -AGGAC 135 1.541814 6.341309 55-59 -GTATG 125 1.5404466 9.123388 45-49 -AACAT 105 1.5324043 13.5055895 9 -AGCTC 150 1.5270194 9.4206915 5 -TTTGT 120 1.5172992 17.551357 8 -GATGA 115 1.5117996 6.082693 5 -GAGAT 115 1.5117996 6.082693 4 -AGGAT 115 1.5117996 12.165386 4 -TGAGA 115 1.5117996 6.082693 5 -CTGGT 150 1.5054625 9.2877 4 -GCTGT 150 1.5054625 18.5754 3 -TTCAC 135 1.504181 10.310883 7 -CCCAG 170 1.5035021 12.276537 2 -CAGTG 140 1.4988785 9.907587 5 -CTCCC 190 1.4978343 7.295242 1 -CCCTG 180 1.4923402 11.5084305 2 -CAGAG 130 1.4847097 7.398194 20-24 -CTTTG 135 1.4829465 10.165323 2 -CAAAA 95 1.4789973 7.203496 9 -TCTCA 130 1.4484707 5.1554413 8 -GAATG 110 1.4460692 12.165386 7 -GGAAT 110 1.4460692 12.165386 5 -TTTGG 125 1.4440656 5.345353 7 -GGCCT 165 1.4386805 12.103227 1 -GCTCT 150 1.4314783 6.1818867 20-24 -TCTGT 130 1.4280226 15.247986 3 -CTGTT 130 1.4280226 15.247986 4 -AGGTT 115 1.4172109 11.404235 8 -TTGAG 115 1.4172107 5.702117 4 -TTTGA 105 1.416247 7.4891143 10-14 -ATCTG 120 1.4061534 5.4218936 2 -GGTCT 140 1.4050984 9.287701 6 -TTTTA 95 1.4024467 7.384491 95-96 -GGGTG 145 1.3983592 13.386638 2 -GGCAC 150 1.3951839 8.607355 4 -AAAGA 85 1.3917071 7.5757985 8 -AAGAA 85 1.3917071 5.254889 75-79 -TTGTT 110 1.3908576 5.850453 4 -GGAGA 115 1.3812783 5.557543 3 -ATGAC 110 1.3750039 6.252721 95-96 -TGTTC 125 1.3730987 10.165325 5 -GGGCA 140 1.3694727 9.052216 4 -ATGAT 95 1.3668885 6.6574664 6 -CCACT 140 1.3551775 5.3746343 30-34 -TGGCT 135 1.3549163 13.931552 3 -GATGG 120 1.3511539 10.419648 9 -TCGTA 115 1.3475639 5.421894 40-44 -TGTCA 115 1.3475639 5.421894 5 -GCTGA 125 1.3382844 9.907587 6 -CAGAA 100 1.3334324 5.6025352 90-94 -CCAAA 105 1.3312978 5.8665853 8 -GGGCT 145 1.3296387 12.728768 1 -TAGGA 100 1.3146083 12.165386 4 -GACAG 115 1.313397 5.2844243 1 -GGTCC 150 1.3078917 8.068819 6 -CCATC 135 1.3067783 8.957724 9 -AAATG 85 1.3046323 7.101804 6 -TTCAA 95 1.2997144 6.330293 9 -CGTAT 110 1.2889742 8.675031 45-49 -TGACT 110 1.2889742 5.421894 3 -TATGC 110 1.2889739 8.67503 45-49 -GCCCT 155 1.2850707 7.672287 3 -TGGGC 140 1.283789 8.485846 7 -ACTTT 100 1.2825212 5.9342256 1 -ATGTT 95 1.2813665 6.2409286 1 -ATTTG 95 1.2813663 12.481856 9 -TGGTT 110 1.2707777 5.345353 5 -TGGTG 120 1.2666163 9.767722 7 -GTTTT 100 1.2644161 5.8504534 6 -GCCTG 145 1.2642952 12.103229 1 -TTGCT 115 1.2632507 6.0991945 50-54 -CCACC 150 1.2614243 7.7821474 5 -GGACA 110 1.2562928 15.853274 6 -GAAGC 110 1.2562928 10.568849 9 -TGACA 100 1.2500036 5.7837667 9 -GACAT 100 1.2500035 11.567533 7 -TGGAA 95 1.248878 6.082693 5 -ACAGC 115 1.2488517 10.049455 5 -AATCC 105 1.2480024 5.499531 7 -TGCCT 130 1.2406145 8.831266 3 -AGGTG 110 1.2385577 5.209824 4 -GTGGC 135 1.2379395 12.728768 1 -CATGT 105 1.2303842 5.4218936 1 -TAGAT 85 1.2230055 6.0453725 90-94 -CCCTC 155 1.2219174 7.295242 4 -GCCGT 140 1.2206988 8.068819 3 -AGTTT 90 1.2139261 6.2409286 7 -TTTAG 90 1.213926 6.240928 8 -TTGGG 115 1.2138406 9.767722 2 -ACCTC 125 1.20998 8.957724 1 -AGCAA 90 1.2000892 6.169792 9 -CAAAG 90 1.2000891 6.169791 5 -AAAGC 90 1.2000891 6.169791 6 -ACAGG 105 1.1991886 10.568849 8 -AGGCA 105 1.1991886 5.712891 95-96 -ATCAG 95 1.1875033 5.7837663 6 -ATGAG 90 1.1831475 6.082693 25-29 -CAGTT 100 1.1717947 5.1698627 85-89 -ATGCT 100 1.1717947 5.421894 8 -TCAAT 85 1.1629024 6.3302937 10-14 -TGTGT 100 1.1552525 10.690706 3 -GCCCA 130 1.1497369 12.276536 1 -TGATT 85 1.1464858 12.481857 5 -TGCTC 120 1.1451827 8.831267 4 -TGTCC 120 1.1451827 13.2469015 2 -TCCCC 145 1.143084 7.295242 2 -AAGGC 100 1.1420842 5.493164 65-69 -CAACA 90 1.1411123 5.8665853 8 -CACAA 90 1.1411123 11.7331705 9 -ACATC 95 1.129145 5.4995303 8 -AAGCT 90 1.1250031 6.2527194 95-96 -GAAAG 80 1.1218792 12.977338 7 -AAGGA 80 1.1218792 6.488669 3 -GCACT 110 1.1198142 9.4206915 5 -CCTGA 110 1.119814 9.420691 9 -ACCTT 100 1.1142083 5.1554418 7 -GTCAT 95 1.113205 5.421894 1 -TGATC 95 1.113205 10.843788 5 -TCATG 95 1.113205 5.421894 3 -TGGAT 90 1.1091216 5.702118 9 -GTGGG 115 1.1090435 8.924425 1 -CTGTG 110 1.1040058 9.2877 4 -GCTTT 100 1.0984789 5.4947696 95-96 -TGTCT 100 1.0984789 10.165323 5 -TTGGT 95 1.0974898 5.345353 4 -CTGTC 115 1.0974668 17.662535 4 -CAGAC 100 1.0859579 5.0247273 5 -GGAAC 95 1.0849801 5.2844243 6 -CCTCG 130 1.0778012 7.672287 6 -GCGGC 135 1.075477 7.372196 1 -ATAAA 60 1.0752101 8.291662 7 -GGGAT 95 1.0696635 10.419649 3 -CATCC 110 1.0647823 8.957723 3 -ACAGT 85 1.062503 5.7837663 4 -ACTGA 85 1.062503 11.567533 7 -GTTGG 100 1.0555136 9.767722 1 -TGTGG 100 1.0555136 9.767722 5 -GGAAA 75 1.0517617 19.466007 6 -GTGAA 80 1.0516868 6.082693 1 -GAAGT 80 1.0516866 6.082693 5 -GTCTC 110 1.0497508 8.831267 1 -CGGCT 120 1.046313 8.068818 1 -TTTAT 70 1.0333818 5.4645233 10-14 -GACAC 95 1.0316601 10.049455 7 -GGCAA 90 1.0278759 10.56885 3 -TCATA 75 1.0260904 6.330293 5 -ATTCA 75 1.0260903 6.3302927 7 -TAACA 70 1.0216029 6.7527957 8 -GGTCA 95 1.0170963 9.907589 3 -ATGGC 95 1.0170962 9.907587 1 -TCAGG 95 1.0170962 9.907587 8 -GGTGA 90 1.0133655 15.629474 3 -TGTTT 80 1.0115329 5.8504534 5 -TGAAT 70 1.007181 6.6574664 5 -ATTGA 70 1.0071809 6.6574664 7 -AAGTT 70 1.0071809 6.6574664 6 -TTGCC 105 1.0020349 8.831267 2 -CTTGC 105 1.0020349 8.831267 6 -GCAAA 75 1.0000744 6.169792 4 -CATAG 80 1.0000029 6.2527204 95-96 -GACTT 85 0.99602544 5.421894 1 -CTGAT 85 0.99602544 5.421894 4 -CTTGT 90 0.988631 10.165323 3 -AATGG 75 0.98595625 6.082693 8 -AAGGT 75 0.9859562 6.0826926 4 -GATGT 80 0.98588586 5.7021174 7 -GGATT 80 0.98588586 11.404235 5 -GGCGG 115 0.96349704 7.753219 1 -AGAGG 80 0.9608892 5.557543 8 -GAGGT 85 0.95706743 5.2098246 3 -ATGGG 85 0.9570673 5.209824 1 -CCGTC 115 0.95343953 7.672287 4 -TAGCA 75 0.9375027 5.7837667 1 -ACATG 75 0.9375026 5.7837663 2 -TTGCA 80 0.93743575 5.421894 4 -GTTCA 80 0.93743575 5.421894 6 -ATGTC 80 0.93743575 5.421894 5 -TTCAG 80 0.93743575 5.421894 8 -TTGAC 80 0.9374356 5.4218936 2 -GTTCT 85 0.93370706 5.0826616 1 -TTGTC 85 0.93370706 5.0826616 9 -TTTGC 85 0.93370706 5.0826616 3 -ATGGT 75 0.924268 5.7021174 4 -ATGAA 60 0.920917 7.1018047 9 -AGATG 70 0.92022586 6.082693 5 -GCTCA 90 0.91621155 5.092265 95-96 -AGTGC 85 0.9100334 9.907587 2 -AGGGT 80 0.90076935 10.419649 1 -GTAGG 80 0.90076923 10.419648 6 -AGTGG 80 0.90076923 5.209824 2 -TAAAA 50 0.89600843 8.291662 8 -CACAT 75 0.89143026 5.499531 6 -CCATT 80 0.89136666 10.3108835 9 -ATACT 65 0.8892783 6.330293 9 -ACATT 65 0.88927823 6.3302927 7 -GCGGG 105 0.87971467 7.753219 2 -ACACC 85 0.8777014 9.555587 9 -CATAA 60 0.8756596 6.7527947 6 -ACCCT 90 0.8711856 13.436585 1 -GAACA 65 0.8667311 6.169792 7 -ACTGC 85 0.8653109 5.092265 95-96 -GGTAT 70 0.86265016 17.106354 6 -AGTTG 70 0.86265016 5.702118 7 -GAGAC 75 0.85656327 5.2844243 1 -GTGTC 85 0.8530954 13.93155 1 -GTTGC 85 0.8530954 9.2877 1 -ATAGA 55 0.84417385 7.1018047 8 -GAAAT 55 0.84417385 7.1018047 5 -CATTC 75 0.83565605 5.155441 6 -TCACA 70 0.83200157 5.499531 3 -TGCGG 90 0.8252928 8.485845 3 -GCATT 70 0.8202563 5.421894 4 -GAACC 75 0.8144686 5.0247283 6 -CTCGA 80 0.81441027 9.420691 6 -GAATC 65 0.8125023 5.7837667 6 -TACAG 65 0.81250226 11.567533 7 -TGGTA 65 0.80103225 11.404236 5 -AAGAC 60 0.80005944 6.169791 8 -CAAGG 70 0.7994591 5.2844243 2 -ATGTA 55 0.7913565 6.6574664 4 -AATGT 55 0.7913565 6.6574664 3 -CGGCA 85 0.7906042 8.607354 2 -GAGAG 65 0.7807225 5.557543 8 -ACCAT 65 0.7725729 5.499531 8 -TTCTA 60 0.7695128 5.934226 9 -TAGAA 50 0.7674308 7.1018047 9 -GCATC 75 0.7635097 9.4206915 1 -GTTCC 80 0.76345515 8.831267 6 -AGCTT 65 0.76166654 5.421894 1 -TTAGC 65 0.76166654 5.421894 9 -CTGTA 65 0.76166654 5.421894 2 -ACTTG 65 0.7616664 5.4218936 2 -GTGCT 75 0.7527313 9.287701 3 -ATCAT 55 0.7524662 6.3302927 3 -GTTTG 65 0.7509141 5.345353 9 -GTGTT 65 0.7509141 10.690706 1 -GTCAA 60 0.75000215 11.5675335 6 -AATGC 60 0.75000215 6.252721 95-96 -CAAGT 60 0.7500021 5.7837663 9 -GCAAT 60 0.7500021 5.7837663 4 -GCAAG 65 0.74235487 5.2844243 1 -AGTGT 60 0.7394144 5.7021174 1 -TTAGG 60 0.7394144 5.702118 7 -AGCGG 75 0.73364604 9.052214 1 -ATCCT 65 0.72423524 5.155441 4 -ACTCT 65 0.72423524 5.155441 9 -AGTGA 55 0.7230346 6.082693 6 -AATAA 40 0.71680677 8.291662 6 -AACCT 60 0.71314424 5.4995303 1 -ATTCT 55 0.70538664 5.9342256 7 -AGTCT 60 0.7030768 5.421894 3 -GTGCA 65 0.69590795 9.907589 6 -AAAGT 45 0.69068766 7.101804 8 -AACTG 55 0.6875019 5.7837663 1 -CGAAG 60 0.68525064 5.2844243 4 -GATTG 55 0.67779654 5.702118 6 -GTGAT 55 0.67779654 11.404236 4 -TGTTA 50 0.67440337 12.481857 5 -TTGTA 50 0.6744033 6.240928 9 -TATTG 50 0.6744033 6.240928 7 -CTCTA 60 0.6685249 5.1554413 7 -TACCT 60 0.66852486 10.310882 8 -ATGGA 50 0.65730417 6.082693 8 -ATACA 45 0.6567447 6.7527957 6 -ATCAA 45 0.65674466 6.7527947 9 -TGTAA 45 0.6474735 6.6574664 7 -GCGGT 70 0.6418945 8.485846 4 -GGCCG 80 0.63731974 7.372196 2 -GGTTT 55 0.63538885 10.690706 9 -TTGTG 55 0.63538885 5.345353 1 -TATAT 40 0.62991583 7.2865515 8 -CCTGT 65 0.62030727 8.831266 3 -GTGAG 55 0.6192789 5.2098246 1 -TAGGG 55 0.61927885 5.209824 8 -GAGTT 50 0.6161787 5.7021174 6 -ATGTG 50 0.6161787 5.702118 2 -GAATA 40 0.61394465 7.1018047 6 -CTGCG 70 0.6103493 8.068818 2 -CGGTG 65 0.59604484 8.485845 2 -TAAGG 45 0.5915738 6.082693 9 -AAGTG 45 0.5915737 6.0826926 1 -TATTT 40 0.5905039 6.8306537 8 -GGCAT 55 0.5888452 14.861383 3 -GTATC 50 0.5858973 5.421894 4 -ATAAC 40 0.5837731 13.505591 7 -TTACT 45 0.57713455 5.934226 9 -GTATA 40 0.575532 13.314933 7 -GAGTG 50 0.5629808 5.209824 1 -GTACA 45 0.5625016 5.7837667 6 -ATAGC 45 0.5625016 5.7837667 9 -TCTAC 50 0.5571041 5.1554413 8 -GCGAG 55 0.53800714 9.052216 1 -ACGGG 55 0.5380071 9.052214 1 -GATAA 35 0.5372016 7.1018047 6 -AATAG 35 0.5372016 7.101805 7 -CAACT 45 0.53485817 5.4995303 6 -CATAC 45 0.53485817 5.4995303 5 -GATTC 45 0.52730757 5.421894 6 -AGGTA 40 0.5258433 12.165386 5 -CGGTC 60 0.52315664 8.068819 5 -ACGAG 45 0.51393795 5.2844243 7 -TATTC 40 0.5130085 5.9342256 7 -CTAAA 35 0.51080143 6.7527957 9 -TACAA 35 0.51080143 5.402236 35-39 -CCTTA 45 0.5013937 5.1554413 6 -CAGTA 40 0.50000143 5.7837667 4 -GTGTA 40 0.49294293 5.702118 4 -TAACT 35 0.47884214 6.330293 8 -CTTAA 35 0.47884214 6.330293 7 -CTATA 35 0.47884214 6.330293 4 -TTAAC 35 0.47884214 6.330293 8 -TATCA 35 0.4788421 6.3302927 5 -TCAAC 40 0.47542948 5.499531 7 -ACTCA 40 0.47542942 5.49953 8 -TTAGT 35 0.47208238 10.120425 95-96 -TGTAT 35 0.47208238 6.2409286 3 -ATTGT 35 0.47208235 6.240928 8 -GTTAC 40 0.46871787 5.421894 6 -TGTAC 40 0.46871787 10.843788 7 -AGAGT 35 0.46011293 6.082693 5 -AGTAG 35 0.46011293 6.082693 5 -CTCCG 55 0.45599285 7.672287 6 -GGTAG 40 0.45038468 5.2098246 2 -TTTAC 35 0.44888243 5.9342256 8 -CTACT 40 0.44568333 5.1554418 4 -AACTA 30 0.4378298 6.7527947 9 -TATAG 30 0.43164897 6.6574664 5 -ATATA 25 0.4199739 7.7728767 9 -CTCAA 35 0.41600078 5.499531 9 -TATAC 30 0.4104361 6.3302927 5 -ACTAT 30 0.4104361 6.3302927 6 -TACTA 30 0.4104361 6.3302927 5 -TCGAT 35 0.41012815 10.843788 7 -ACGTT 35 0.41012815 5.421894 4 -CGAAA 30 0.40002972 6.169792 9 -GTAAG 30 0.3943825 6.082693 8 -ATAGG 30 0.3943825 6.082693 3 -TCCTA 35 0.38997287 5.1554413 5 -TTACC 35 0.38997287 5.1554413 7 -ACCGA 35 0.3800853 5.0247273 7 -GCATA 30 0.37500107 5.7837667 1 -TCGAA 30 0.37500107 5.7837667 4 -GCTAA 30 0.37500107 5.7837667 8 -TAGGT 30 0.3697072 5.7021174 7 -GTTAG 30 0.3697072 5.702118 6 -CAATA 25 0.36485815 6.7527947 5 -ATACC 30 0.35657212 5.499531 6 -GACGA 30 0.3426253 5.284424 6 -AAGCG 30 0.3426253 10.568848 7 -GTTTA 25 0.33720168 6.2409286 7 -GTATT 25 0.33720168 12.481857 6 -AGATA 20 0.30697232 7.1018047 5 -CGTCA 30 0.30540386 9.420691 5 -CCTAA 25 0.29714343 5.499531 7 -TACCA 25 0.2971434 5.49953 9 -TGCTA 25 0.29294866 5.421894 7 -TACGT 25 0.29294863 5.4218936 9 -AGACG 25 0.2855211 5.284425 9 -CCTAT 25 0.2785521 5.1554418 3 -TAAGC 20 0.25000072 5.7837667 9 -CTAAG 20 0.25000072 5.7837667 8 -CGATT 20 0.23435894 5.421894 9 -GGGTA 20 0.22519234 5.2098246 2 -ACGCA 20 0.21719159 5.0247273 5 -GCGAA 15 0.17131266 5.284425 3 -CGAAC 15 0.16289368 5.0247273 5 ->>END_MODULE diff --git a/public/mapping/.gitignore b/public/mapping/.gitignore deleted file mode 100644 index a6f89c2da7a029afa02b6e7a2bf80ad34958a311..0000000000000000000000000000000000000000 --- a/public/mapping/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/target/ \ No newline at end of file diff --git a/public/pom.xml b/public/pom.xml deleted file mode 100644 index 63bde77e79b63f61366f06a7d472a98b0cd9db0d..0000000000000000000000000000000000000000 --- a/public/pom.xml +++ /dev/null @@ -1,279 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - - Biopet is built on top of GATK Queue for building bioinformatic - pipelines. It is mainly intended to support LUMC SHARK cluster which is running - SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - should also be able to execute Biopet tools and pipelines. - - Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - - Contact us at: sasc@lumc.nl - - A dual licensing mode is applied. The source code within this project that are - not part of GATK Queue is freely available for non-commercial use under an AGPL - license; For commercial users or users who do not want to follow the AGPL - license, please contact us to obtain a separate license. - ---> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - <artifactId>Biopet</artifactId> - <groupId>nl.lumc.sasc</groupId> - <name>Biopet</name> - <packaging>pom</packaging> - <version>0.7.0-SNAPSHOT</version> - - <modules> - <module>biopet-public-package</module> - <module>bam2wig</module> - <module>bammetrics</module> - <module>basty</module> - <module>carp</module> - <module>flexiprep</module> - <module>gears</module> - <module>generate-indexes</module> - <module>gentrap</module> - <module>kopisu</module> - <module>mapping</module> - <module>sage</module> - <module>shiva</module> - <module>tinycap</module> - <module>toucan</module> - <module>biopet-core</module> - <module>biopet-utils</module> - <module>biopet-tools</module> - <module>biopet-tools-extensions</module> - <module>biopet-extensions</module> - <module>biopet-tools-package</module> - <module>gwas-test</module> - </modules> - - <properties> - <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> - <scoverage.plugin.version>1.1.1</scoverage.plugin.version> - <scalaVersion>2.10.4</scalaVersion> - <scoverage.aggregate>true</scoverage.aggregate> - </properties> - - <build> - <sourceDirectory>${basedir}/src/main/scala</sourceDirectory> - <testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory> - <testResources> - <testResource> - <directory>${basedir}/src/test/resources</directory> - <includes> - <include>**/*</include> - </includes> - </testResource> - </testResources> - <resources> - <resource> - <directory>${basedir}/src/main/resources</directory> - <includes> - <include>**/*</include> - </includes> - </resource> - </resources> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-surefire-plugin</artifactId> - <version>2.18.1</version> - <configuration> - <forkCount>1C</forkCount> - <argLine>-Xmx300m</argLine> - <workingDirectory>${project.build.directory}</workingDirectory> - </configuration> - </plugin> - <plugin> - <artifactId>maven-dependency-plugin</artifactId> - <version>2.10</version> - <executions> - <execution> - <id>copy-installed</id> - <phase>prepare-package</phase> - <goals> - <goal>list</goal> - </goals> - <configuration> - <outputFile>${project.build.outputDirectory}/dependency_list.txt</outputFile> - </configuration> - </execution> - </executions> - </plugin> - <plugin> - <groupId>net.alchim31.maven</groupId> - <artifactId>scala-maven-plugin</artifactId> - <version>3.2.0</version> - <executions> - <execution> - <id>scala-compile</id> - <goals> - <goal>compile</goal> - <goal>testCompile</goal> - </goals> - <configuration> - <args> - <arg>-dependencyfile</arg> - <arg>${project.build.directory}/.scala_dependencies</arg> - <arg>-deprecation</arg> - <arg>-feature</arg> - </args> - </configuration> - </execution> - </executions> - <!-- ... (see other usage or goals for details) ... --> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <version>2.5</version> - <executions> - <execution> - <goals> - <goal>test-jar</goal> - </goals> - </execution> - </executions> - <configuration> - <archive> - <manifest> - <addDefaultImplementationEntries>true</addDefaultImplementationEntries> - <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries> - </manifest> - </archive> - </configuration> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-compiler-plugin</artifactId> - <version>2.3.2</version> - <configuration> - <showDeprecation>true</showDeprecation> - </configuration> - </plugin> - <plugin> - <groupId>org.scalariform</groupId> - <artifactId>scalariform-maven-plugin</artifactId> - <version>0.1.4</version> - <executions> - <execution> - <phase>process-sources</phase> - <goals> - <goal>format</goal> - </goals> - <configuration> - <baseDir>${basedir}/src</baseDir> - <rewriteArrowSymbols>false</rewriteArrowSymbols> - <alignParameters>true</alignParameters> - <alignSingleLineCaseStatements_maxArrowIndent>40 - </alignSingleLineCaseStatements_maxArrowIndent> - <alignSingleLineCaseStatements>true</alignSingleLineCaseStatements> - <compactStringConcatenation>false</compactStringConcatenation> - <compactControlReadability>false</compactControlReadability> - <doubleIndentClassDeclaration>false</doubleIndentClassDeclaration> - <formatXml>true</formatXml> - <indentLocalDefs>false</indentLocalDefs> - <indentPackageBlocks>true</indentPackageBlocks> - <indentSpaces>2</indentSpaces> - <placeScaladocAsterisksBeneathSecondAsterisk>false - </placeScaladocAsterisksBeneathSecondAsterisk> - <preserveDanglingCloseParenthesis>true</preserveDanglingCloseParenthesis> - <preserveSpaceBeforeArguments>false</preserveSpaceBeforeArguments> - <rewriteArrowSymbols>false</rewriteArrowSymbols> - <spaceBeforeColon>false</spaceBeforeColon> - <spaceInsideBrackets>false</spaceInsideBrackets> - <spaceInsideParentheses>false</spaceInsideParentheses> - <spacesWithinPatternBinders>true</spacesWithinPatternBinders> - </configuration> - </execution> - </executions> - </plugin> - <plugin> - <groupId>pl.project13.maven</groupId> - <artifactId>git-commit-id-plugin</artifactId> - <version>2.1.10</version> - <executions> - <execution> - <goals> - <goal>revision</goal> - </goals> - </execution> - </executions> - <configuration> - <prefix>git</prefix> - <dateFormat>dd.MM.yyyy '@' HH:mm:ss z</dateFormat> - <verbose>false</verbose> - <dotGitDirectory>${basedir}/../../.git</dotGitDirectory> - <useNativeGit>true</useNativeGit> - <skipPoms>false</skipPoms> - <generateGitPropertiesFile>true</generateGitPropertiesFile> - <generateGitPropertiesFilename>src/main/resources/git.properties</generateGitPropertiesFilename> - <failOnNoGitDirectory>false</failOnNoGitDirectory> - <abbrevLength>8</abbrevLength> - <skip>false</skip> - <gitDescribe> - <skip>false</skip> - <always>false</always> - <abbrev>8</abbrev> - <dirty>-dirty</dirty> - <forceLongFormat>false</forceLongFormat> - </gitDescribe> - </configuration> - </plugin> - <plugin> - <groupId>com.mycila</groupId> - <artifactId>license-maven-plugin</artifactId> - <version>2.6</version> - <configuration> - <excludes> - <exclude>**/*git*</exclude> - <exclude>**/License*</exclude> - <exclude>**/*.bam</exclude> - <exclude>**/*.bai</exclude> - <exclude>**/*.gtf</exclude> - <exclude>**/*.fq</exclude> - <exclude>**/*.sam</exclude> - <exclude>**/*.bed</exclude> - <exclude>**/*.refFlat</exclude> - <exclude>**/*.R</exclude> - <exclude>**/*.rscript</exclude> - </excludes> - </configuration> - </plugin> - <plugin> - <groupId>org.scoverage</groupId> - <artifactId>scoverage-maven-plugin</artifactId> - <version>${scoverage.plugin.version}</version> - <configuration> - <scalaVersion>${scalaVersion}</scalaVersion> - <aggregate>true</aggregate> - <highlighting>true</highlighting> - <aggregate>true</aggregate> - <!-- other parameters --> - </configuration> - </plugin> - </plugins> - </build> - <reporting> - <plugins> - <plugin> - <groupId>org.scoverage</groupId> - <artifactId>scoverage-maven-plugin</artifactId> - <version>${scoverage.plugin.version}</version> - <configuration> - <aggregate>true</aggregate> <!-- for aggregated report --> - <highlighting>true</highlighting> - </configuration> - <reportSets> - <reportSet> - <reports> - <report>report</report> <!-- select only one report from: report, integration-report and report-only reporters --> - </reports> - </reportSet> - </reportSets> - </plugin> - </plugins> - </reporting> -</project> diff --git a/public/sage/.gitignore b/public/sage/.gitignore deleted file mode 100644 index a6f89c2da7a029afa02b6e7a2bf80ad34958a311..0000000000000000000000000000000000000000 --- a/public/sage/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/target/ \ No newline at end of file diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala deleted file mode 100644 index a7d04155b164df95ffad2753ff6a8395f57520c5..0000000000000000000000000000000000000000 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala +++ /dev/null @@ -1,32 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.pipelines.shiva - -import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.QScript - -/** - * Implementation shiva without GATK parts - * - * Created by pjvan_thof on 2/24/15. - */ -class Shiva(val root: Configurable) extends QScript with ShivaTrait { - def this() = this(null) -} - -/** This object give a default main method to the pipelines */ -object Shiva extends PipelineCommand \ No newline at end of file diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala deleted file mode 100644 index d075619c1c39264adf9fba823b043110000b1668..0000000000000000000000000000000000000000 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.pipelines.shiva - -import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.QScript - -/** - * Implementation of ShivaVariantcalling without GATK variantcallers - * - * Created by pjvan_thof on 2/26/15. - */ -class ShivaVariantcalling(val root: Configurable) extends QScript with ShivaVariantcallingTrait { - def this() = this(null) -} - -object ShivaVariantcalling extends PipelineCommand \ No newline at end of file diff --git a/public/shiva/src/test/resources/ref.dict b/public/shiva/src/test/resources/ref.dict deleted file mode 100644 index ab5ceac32357f56f2b7c4ddbfdb17197187c9da8..0000000000000000000000000000000000000000 --- a/public/shiva/src/test/resources/ref.dict +++ /dev/null @@ -1,2 +0,0 @@ -@HD VN:1.4 SO:unsorted -@SQ SN:chr1 LN:9 UR:file:/home/pjvan_thof/pipelines/biopet/public/mapping/src/test/resources/ref.fa M5:fe15dbbd0900310caf32827f6da57550 diff --git a/public/shiva/src/test/resources/ref.fa b/public/shiva/src/test/resources/ref.fa deleted file mode 100644 index 0c51751ffe1eee84552290f10f8cdb6c84625131..0000000000000000000000000000000000000000 --- a/public/shiva/src/test/resources/ref.fa +++ /dev/null @@ -1,2 +0,0 @@ ->chr1 -AGTAGTAGT diff --git a/public/shiva/src/test/resources/ref.fa.fai b/public/shiva/src/test/resources/ref.fa.fai deleted file mode 100644 index 17ecf802afe8cf33c2e35f72e604bb039114d77c..0000000000000000000000000000000000000000 --- a/public/shiva/src/test/resources/ref.fa.fai +++ /dev/null @@ -1 +0,0 @@ -chr1 9 6 9 10 diff --git a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala deleted file mode 100644 index c2249f15511d241428ef6f7d8f39b4bbc41e1143..0000000000000000000000000000000000000000 --- a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala +++ /dev/null @@ -1,163 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.pipelines.shiva - -import java.io.{ File, FileOutputStream } - -import com.google.common.io.Files -import nl.lumc.sasc.biopet.utils.config.Config -import nl.lumc.sasc.biopet.extensions.bwa.BwaMem -import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, SortSam } -import nl.lumc.sasc.biopet.extensions.tools.VcfStats -import nl.lumc.sasc.biopet.utils.ConfigUtils -import org.broadinstitute.gatk.queue.QSettings -import org.scalatest.Matchers -import org.scalatest.testng.TestNGSuite -import org.testng.annotations.{ DataProvider, Test } - -/** - * Test class for [[Shiva]] - * - * Created by pjvan_thof on 3/2/15. - */ -class ShivaTest extends TestNGSuite with Matchers { - def initPipeline(map: Map[String, Any]): Shiva = { - new Shiva() { - override def configNamespace = "shiva" - override def globalConfig = new Config(ConfigUtils.mergeMaps(map, ShivaTest.config)) - qSettings = new QSettings - qSettings.runName = "test" - } - } - - @DataProvider(name = "shivaOptions") - def shivaOptions = { - val bool = Array(true, false) - - for (s1 <- bool; s2 <- bool; s3 <- bool; multi <- bool; single <- bool; library <- bool) - yield Array("", s1, s2, s3, multi, single, library) - } - - @Test(dataProvider = "shivaOptions") - def testShiva(f: String, sample1: Boolean, sample2: Boolean, sample3: Boolean, - multi: Boolean, single: Boolean, library: Boolean): Unit = { - val map = { - var m: Map[String, Any] = ShivaTest.config - if (sample1) m = ConfigUtils.mergeMaps(ShivaTest.sample1, m) - if (sample2) m = ConfigUtils.mergeMaps(ShivaTest.sample2, m) - if (sample3) m = ConfigUtils.mergeMaps(ShivaTest.sample3, m) - ConfigUtils.mergeMaps(Map("multisample_variantcalling" -> multi, - "single_sample_variantcalling" -> single, - "library_variantcalling" -> library), m) - } - - if (!sample1 && !sample2 && !sample3) { // When no samples - intercept[IllegalArgumentException] { - initPipeline(map).script() - } - } else { - val pipeline = initPipeline(map) - pipeline.script() - - val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 2 else 0) - val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 1 else 0) - - pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample3) 1 else 0)) - - pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (if (multi) 2 else 0) + - (if (single) numberSamples * 2 else 0) + (if (library) numberLibs * 2 else 0) - } - } -} - -object ShivaTest { - val outputDir = Files.createTempDir() - new File(outputDir, "input").mkdirs() - def inputTouch(name: String): String = { - val file = new File(outputDir, "input" + File.separator + name) - Files.touch(file) - file.getAbsolutePath - } - - private def copyFile(name: String): Unit = { - val is = getClass.getResourceAsStream("/" + name) - val os = new FileOutputStream(new File(outputDir, name)) - org.apache.commons.io.IOUtils.copy(is, os) - os.close() - } - - copyFile("ref.fa") - copyFile("ref.dict") - copyFile("ref.fa.fai") - - val config = Map( - "name_prefix" -> "test", - "output_dir" -> outputDir, - "cache" -> true, - "dir" -> "test", - "vep_script" -> "test", - "reference_fasta" -> (outputDir + File.separator + "ref.fa"), - "gatk_jar" -> "test", - "samtools" -> Map("exe" -> "test"), - "bcftools" -> Map("exe" -> "test"), - "fastqc" -> Map("exe" -> "test"), - "input_alleles" -> "test", - "variantcallers" -> "raw", - "fastqc" -> Map("exe" -> "test"), - "seqtk" -> Map("exe" -> "test"), - "sickle" -> Map("exe" -> "test"), - "cutadapt" -> Map("exe" -> "test"), - "bwa" -> Map("exe" -> "test"), - "samtools" -> Map("exe" -> "test"), - "macs2" -> Map("exe" -> "test"), - "igvtools" -> Map("exe" -> "test"), - "wigtobigwig" -> Map("exe" -> "test"), - "md5sum" -> Map("exe" -> "test"), - "bgzip" -> Map("exe" -> "test"), - "tabix" -> Map("exe" -> "test") - ) - - val sample1 = Map( - "samples" -> Map("sample1" -> Map("libraries" -> Map( - "lib1" -> Map( - "R1" -> inputTouch("1_1_R1.fq"), - "R2" -> inputTouch("1_1_R2.fq") - ) - ) - ))) - - val sample2 = Map( - "samples" -> Map("sample2" -> Map("libraries" -> Map( - "lib1" -> Map( - "R1" -> inputTouch("2_1_R1.fq"), - "R2" -> inputTouch("2_1_R2.fq") - ) - ) - ))) - - val sample3 = Map( - "samples" -> Map("sample3" -> Map("libraries" -> Map( - "lib1" -> Map( - "R1" -> inputTouch("3_1_R1.fq"), - "R2" -> inputTouch("3_1_R2.fq") - ), - "lib2" -> Map( - "R1" -> inputTouch("3_2_R1.fq"), - "R2" -> inputTouch("3_2_R2.fq") - ) - ) - ))) -} \ No newline at end of file diff --git a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala deleted file mode 100644 index d86e46c7e9ef21dfcdd5a67f779e46adc2b23483..0000000000000000000000000000000000000000 --- a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala +++ /dev/null @@ -1,143 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.pipelines.shiva - -import java.io.{ File, FileOutputStream } - -import com.google.common.io.Files -import nl.lumc.sasc.biopet.core.BiopetPipe -import nl.lumc.sasc.biopet.extensions.Freebayes -import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge } -import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants -import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter } -import nl.lumc.sasc.biopet.utils.ConfigUtils -import nl.lumc.sasc.biopet.utils.config.Config -import org.broadinstitute.gatk.queue.QSettings -import org.scalatest.Matchers -import org.scalatest.testng.TestNGSuite -import org.testng.annotations.{ DataProvider, Test } - -import scala.collection.mutable.ListBuffer - -/** - * Test class for [[ShivaVariantcalling]] - * - * Created by pjvan_thof on 3/2/15. - */ -class ShivaVariantcallingTest extends TestNGSuite with Matchers { - def initPipeline(map: Map[String, Any]): ShivaVariantcalling = { - new ShivaVariantcalling { - override def configNamespace = "shivavariantcalling" - override def globalConfig = new Config(ConfigUtils.mergeMaps(map, ShivaVariantcallingTest.config)) - qSettings = new QSettings - qSettings.runName = "test" - } - } - - @DataProvider(name = "shivaVariantcallingOptions") - def shivaVariantcallingOptions = { - val bool = Array(true, false) - (for ( - bams <- 0 to 3; - raw <- bool; - bcftools <- bool; - bcftoolsSinglesample <- bool; - freebayes <- bool; - varscanCnsSinglesample <- bool - ) yield Array(bams, raw, bcftools, bcftoolsSinglesample, freebayes, varscanCnsSinglesample)).toArray - } - - @Test(dataProvider = "shivaVariantcallingOptions") - def testShivaVariantcalling(bams: Int, - raw: Boolean, - bcftools: Boolean, - bcftoolsSinglesample: Boolean, - freebayes: Boolean, - varscanCnsSinglesample: Boolean) = { - val callers: ListBuffer[String] = ListBuffer() - if (raw) callers.append("raw") - if (bcftools) callers.append("bcftools") - if (bcftoolsSinglesample) callers.append("bcftools_singlesample") - if (freebayes) callers.append("freebayes") - if (varscanCnsSinglesample) callers.append("varscan_cns_singlesample") - val map = Map("variantcallers" -> callers.toList) - val pipeline = initPipeline(map) - - pipeline.inputBams = (for (n <- 1 to bams) yield n.toString -> ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toMap - - val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !bcftoolsSinglesample && !freebayes && !varscanCnsSinglesample) - - if (illegalArgumentException) intercept[IllegalArgumentException] { - pipeline.init() - pipeline.script() - } - - if (!illegalArgumentException) { - pipeline.init() - pipeline.script() - - val pipesJobs = pipeline.functions.filter(_.isInstanceOf[BiopetPipe]).flatMap(_.asInstanceOf[BiopetPipe].pipesJobs) - - pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe (1 + (if (raw) 1 else 0) + (if (varscanCnsSinglesample) 1 else 0)) - pipesJobs.count(_.isInstanceOf[BcftoolsCall]) shouldBe (if (bcftools) 1 else 0) + (if (bcftoolsSinglesample) bams else 0) - pipeline.functions.count(_.isInstanceOf[BcftoolsMerge]) shouldBe (if (bcftoolsSinglesample && bams > 1) 1 else 0) - pipesJobs.count(_.isInstanceOf[Freebayes]) shouldBe (if (freebayes) 1 else 0) - pipesJobs.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0) - pipeline.functions.count(_.isInstanceOf[VcfFilter]) shouldBe (if (raw) bams else 0) - } - } -} - -object ShivaVariantcallingTest { - val outputDir = Files.createTempDir() - outputDir.deleteOnExit() - new File(outputDir, "input").mkdirs() - private def inputTouch(name: String): File = { - val file = new File(outputDir, "input" + File.separator + name).getAbsoluteFile - Files.touch(file) - file - } - - private def copyFile(name: String): Unit = { - val is = getClass.getResourceAsStream("/" + name) - val os = new FileOutputStream(new File(outputDir, name)) - org.apache.commons.io.IOUtils.copy(is, os) - os.close() - } - - copyFile("ref.fa") - copyFile("ref.dict") - copyFile("ref.fa.fai") - - val config = Map( - "name_prefix" -> "test", - "output_dir" -> outputDir, - "cache" -> true, - "dir" -> "test", - "vep_script" -> "test", - "reference_fasta" -> (outputDir + File.separator + "ref.fa"), - "gatk_jar" -> "test", - "samtools" -> Map("exe" -> "test"), - "bcftools" -> Map("exe" -> "test"), - "freebayes" -> Map("exe" -> "test"), - "md5sum" -> Map("exe" -> "test"), - "bgzip" -> Map("exe" -> "test"), - "tabix" -> Map("exe" -> "test"), - "rscript" -> Map("exe" -> "test"), - "exe" -> "test", - "varscan_jar" -> "test" - ) -} \ No newline at end of file diff --git a/public/toucan/.gitignore b/public/toucan/.gitignore deleted file mode 100644 index a6f89c2da7a029afa02b6e7a2bf80ad34958a311..0000000000000000000000000000000000000000 --- a/public/toucan/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/target/ \ No newline at end of file diff --git a/public/gentrap/.gitignore b/sage/.gitignore similarity index 100% rename from public/gentrap/.gitignore rename to sage/.gitignore diff --git a/public/sage/pom.xml b/sage/pom.xml similarity index 100% rename from public/sage/pom.xml rename to sage/pom.xml diff --git a/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala b/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala similarity index 100% rename from public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala rename to sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala diff --git a/public/sage/src/test/resources/log4j.properties b/sage/src/test/resources/log4j.properties similarity index 100% rename from public/sage/src/test/resources/log4j.properties rename to sage/src/test/resources/log4j.properties diff --git a/public/shiva/.gitignore b/shiva/.gitignore similarity index 100% rename from public/shiva/.gitignore rename to shiva/.gitignore diff --git a/public/shiva/pom.xml b/shiva/pom.xml similarity index 100% rename from public/shiva/pom.xml rename to shiva/pom.xml diff --git a/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/outputVcfFiles.ssp b/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/outputVcfFiles.ssp similarity index 100% rename from public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/outputVcfFiles.ssp rename to shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/outputVcfFiles.ssp diff --git a/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp b/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp similarity index 100% rename from public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp rename to shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp diff --git a/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp b/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp similarity index 100% rename from public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp rename to shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala similarity index 55% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala index 76aee99b4f3b1c857edc4f402aaae1133f313a55..ed0e1318d96c615346172b5e1add0df6dc4476d0 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala @@ -15,11 +15,13 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva -import nl.lumc.sasc.biopet.core.Reference +import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference } import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension +import nl.lumc.sasc.biopet.extensions.gatk._ import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait import nl.lumc.sasc.biopet.pipelines.toucan.Toucan +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript /** @@ -27,7 +29,9 @@ import org.broadinstitute.gatk.queue.QScript * * Created by pjvan_thof on 2/26/15. */ -trait ShivaTrait extends MultisampleMappingTrait with Reference with TargetRegions { qscript: QScript => +class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait with Reference with TargetRegions { qscript => + + def this() = this(null) override def reportClass: Option[ReportBuilderExtension] = { val shiva = new ShivaReport(this) @@ -36,8 +40,14 @@ trait ShivaTrait extends MultisampleMappingTrait with Reference with TargetRegio Some(shiva) } + override def defaults = Map( + "haplotypecaller" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0), + "genotypegvcfs" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0), + "unifiedgenotyper" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0) + ) + /** Method to make the variantcalling namespace of shiva */ - def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait with QScript = { + def makeVariantcalling(multisample: Boolean = false): ShivaVariantcalling with QScript = { if (multisample) new ShivaVariantcalling(qscript) { override def namePrefix = "multisample" override def configNamespace: String = "shivavariantcalling" @@ -57,12 +67,32 @@ trait ShivaTrait extends MultisampleMappingTrait with Reference with TargetRegio override def makeLibrary(id: String) = new this.Library(id) /** Sample specific settings */ - override def summarySettings = Map("single_sample_variantcalling" -> variantcalling.isDefined) + override def summarySettings = super.summarySettings ++ + Map("single_sample_variantcalling" -> variantcalling.isDefined, "use_indel_realigner" -> useIndelRealigner) /** Class to generate jobs for a library */ class Library(libId: String) extends super.Library(libId) { + + lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true) + lazy val useBaseRecalibration: Boolean = { + val c: Boolean = config("use_base_recalibration", default = true) + val br = new BaseRecalibrator(qscript) + if (c && br.knownSites.isEmpty) + logger.warn("No Known site found, skipping base recalibration, file: " + inputBam) + c && br.knownSites.nonEmpty + } + + override def preProcessBam = if (useIndelRealigner && useBaseRecalibration) + bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam")) + else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam")) + else if (useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam")) + else bamFile + /** Library specific settings */ - override def summarySettings = Map("library_variantcalling" -> variantcalling.isDefined) + override def summarySettings = Map( + "library_variantcalling" -> variantcalling.isDefined, + "use_indel_realigner" -> useIndelRealigner, + "use_base_recalibration" -> useBaseRecalibration) lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean && (bamFile.isDefined || preProcessBam.isDefined)) { @@ -73,6 +103,15 @@ trait ShivaTrait extends MultisampleMappingTrait with Reference with TargetRegio override def addJobs() = { super.addJobs() + if (useIndelRealigner && useBaseRecalibration) { + val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true) + addBaseRecalibrator(file, libDir, libraries.size > 1) + } else if (useIndelRealigner) { + addIndelRealign(bamFile.get, libDir, libraries.size > 1) + } else if (useBaseRecalibration) { + addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1) + } + variantcalling.foreach(vc => { vc.sampleId = Some(sampleId) vc.libId = Some(libId) @@ -88,10 +127,22 @@ trait ShivaTrait extends MultisampleMappingTrait with Reference with TargetRegio Some(makeVariantcalling(multisample = false)) } else None + override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner) + + lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true) + + override def preProcessBam = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) { + bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam")) + } else bamFile + /** This will add sample jobs */ override def addJobs(): Unit = { super.addJobs() + if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) { + addIndelRealign(bamFile.get, sampleDir, false) + } + preProcessBam.foreach { bam => variantcalling.foreach(vc => { vc.sampleId = Some(sampleId) @@ -150,4 +201,43 @@ trait ShivaTrait extends MultisampleMappingTrait with Reference with TargetRegio "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")), "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")) ) + + /** Adds indel realignment jobs */ + def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean): File = { + val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir) + realignerTargetCreator.isIntermediate = true + add(realignerTargetCreator) + + val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir) + indelRealigner.isIntermediate = isIntermediate + add(indelRealigner) + + indelRealigner.out + } + + /** Adds base recalibration jobs */ + def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean): File = { + val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal")) + + if (baseRecalibrator.knownSites.isEmpty) return inputBam + add(baseRecalibrator) + + if (config("use_analyze_covariates", default = true).asBoolean) { + val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after")) + baseRecalibratorAfter.BQSR = Some(baseRecalibrator.out) + add(baseRecalibratorAfter) + + add(AnalyzeCovariates(this, baseRecalibrator.out, baseRecalibratorAfter.out, swapExt(dir, inputBam, ".bam", ".baserecal.pdf"))) + } + + val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam")) + printReads.BQSR = Some(baseRecalibrator.out) + printReads.isIntermediate = isIntermediate + add(printReads) + + printReads.out + } } + +/** This object give a default main method to the pipelines */ +object Shiva extends PipelineCommand \ No newline at end of file diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala similarity index 98% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala index 890257549c5ac09a8ae5254d85ec8ed6fcb1fb48..de344fe61c994c67935ccdaee1fdc366c150da70 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala @@ -44,9 +44,6 @@ object ShivaReport extends MultisampleMappingReportTrait { override def pipelineName = "shiva" - override def extFiles = super.extFiles ++ List("js/gears.js") - .map(x => ExtFile("/nl/lumc/sasc/biopet/pipelines/gears/report/ext/" + x, x)) - override def additionalSections = super.additionalSections ++ (if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp", Map("showPlot" -> true, "showTable" -> false))) else Nil) @@ -120,6 +117,7 @@ object ShivaReport extends MultisampleMappingReportTrait { /** * Generate a stackbar plot for found variants + * * @param outputDir OutputDir for the tsv and png file * @param prefix Prefix of the tsv and png file * @param summary Summary class diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala similarity index 100% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala similarity index 80% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala index 06f876979fb3ae541be76758d031174cb6c93e8b..c38cb8564ddb0f9243443bab60f6cb41ceec5b53 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala @@ -15,27 +15,32 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva +import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag } import nl.lumc.sasc.biopet.core.summary.SummaryQScript -import nl.lumc.sasc.biopet.core.{ Reference, SampleLibraryTag } import nl.lumc.sasc.biopet.extensions.Tabix import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance } import nl.lumc.sasc.biopet.extensions.tools.VcfStats import nl.lumc.sasc.biopet.extensions.vt.{ VtDecompose, VtNormalize } import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions -import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers._ +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.{ VarscanCnsSingleSample, _ } import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile /** - * Common trait for ShivaVariantcalling + * Implementation of ShivaVariantcalling * * Created by pjvan_thof on 2/26/15. */ -trait ShivaVariantcallingTrait extends SummaryQScript +class ShivaVariantcalling(val root: Configurable) extends QScript + with SummaryQScript with SampleLibraryTag with Reference with TargetRegions { - qscript: QScript => + qscript => + + def this() = this(null) @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true) protected var inputBamsArg: List[File] = Nil @@ -82,10 +87,10 @@ trait ShivaVariantcallingTrait extends SummaryQScript require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", ")) val cv = new CombineVariants(qscript) - cv.outputFile = finalFile - cv.setKey = "VariantCaller" - cv.genotypeMergeOptions = Some("PRIORITIZE") - cv.rodPriorityList = callers.map(_.name).mkString(",") + cv.out = finalFile + cv.setKey = Some("VariantCaller") + cv.genotypemergeoption = Some("PRIORITIZE") + cv.rod_priority_list = Some(callers.map(_.name).mkString(",")) for (caller <- callers) { caller.inputBams = inputBams caller.namePrefix = namePrefix @@ -106,17 +111,17 @@ trait ShivaVariantcallingTrait extends SummaryQScript vtDecompose.inputVcf = vtNormalize.outputVcf vtDecompose.outputVcf = swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz") add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) - cv.addInput(vtDecompose.outputVcf, caller.name) + cv.variant :+= TaggedFile(vtDecompose.outputVcf, caller.name) } else if (normalize && !decompose) { vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz") add(vtNormalize, Tabix(this, vtNormalize.outputVcf)) - cv.addInput(vtNormalize.outputVcf, caller.name) + cv.variant :+= TaggedFile(vtNormalize.outputVcf, caller.name) } else if (!normalize && decompose) { vtDecompose.inputVcf = caller.outputFile vtDecompose.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz") add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) - cv.addInput(vtDecompose.outputVcf, caller.name) - } else cv.addInput(caller.outputFile, caller.name) + cv.variant :+= TaggedFile(vtDecompose.outputVcf, caller.name) + } else cv.variant :+= TaggedFile(caller.outputFile, caller.name) } add(cv) @@ -135,9 +140,9 @@ trait ShivaVariantcallingTrait extends SummaryQScript referenceVcf.foreach(referenceVcfFile => { val gc = new GenotypeConcordance(this) - gc.evalFile = vcfFile - gc.compFile = referenceVcfFile - gc.outputFile = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt") + gc.eval = vcfFile + gc.comp = referenceVcfFile + gc.out = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt") referenceVcfRegions.foreach(gc.intervals ::= _) add(gc) addSummarizable(gc, s"$namePrefix-genotype_concordance-$name") @@ -156,12 +161,17 @@ trait ShivaVariantcallingTrait extends SummaryQScript } /** Will generate all available variantcallers */ - protected def callersList: List[Variantcaller] = List( - new Freebayes(this), - new RawVcf(this), - new Bcftools(this), - new BcftoolsSingleSample(this), - new VarscanCnsSingleSample(this)) + protected def callersList: List[Variantcaller] = + new HaplotypeCallerGvcf(this) :: + new HaplotypeCallerAllele(this) :: + new UnifiedGenotyperAllele(this) :: + new UnifiedGenotyper(this) :: + new HaplotypeCaller(this) :: + new Freebayes(this) :: + new RawVcf(this) :: + new Bcftools(this) :: + new BcftoolsSingleSample(this) :: + new VarscanCnsSingleSample(this) :: Nil /** Location of summary file */ def summaryFile = new File(outputDir, "ShivaVariantcalling.summary.json") @@ -177,4 +187,6 @@ trait ShivaVariantcallingTrait extends SummaryQScript def summaryFiles: Map[String, File] = { callers.map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile) } -} \ No newline at end of file +} + +object ShivaVariantcalling extends PipelineCommand \ No newline at end of file diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala similarity index 100% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala similarity index 100% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala similarity index 88% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala index 2e5613055d95f5911bcc0eec453b2b5678380d6c..91f8468b189b878d756554782b956a8a0037ceef 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala @@ -41,7 +41,7 @@ class Delly(val root: Configurable) extends SvCaller { delly.analysistype = "DEL" delly.outputvcf = new File(dellyDir, sample + ".delly.del.vcf") add(delly) - catVariants.inputFiles :+= delly.outputvcf + catVariants.variant :+= delly.outputvcf } if (dup) { val delly = new DellyCaller(this) @@ -49,7 +49,7 @@ class Delly(val root: Configurable) extends SvCaller { delly.analysistype = "DUP" delly.outputvcf = new File(dellyDir, sample + ".delly.dup.vcf") add(delly) - catVariants.inputFiles :+= delly.outputvcf + catVariants.variant :+= delly.outputvcf } if (inv) { val delly = new DellyCaller(this) @@ -57,18 +57,18 @@ class Delly(val root: Configurable) extends SvCaller { delly.analysistype = "INV" delly.outputvcf = new File(dellyDir, sample + ".delly.inv.vcf") add(delly) - catVariants.inputFiles :+= delly.outputvcf + catVariants.variant :+= delly.outputvcf } if (tra) { val delly = new DellyCaller(this) delly.input = bamFile delly.analysistype = "TRA" delly.outputvcf = new File(dellyDir, sample + ".delly.tra.vcf") - catVariants.inputFiles :+= delly.outputvcf + catVariants.variant :+= delly.outputvcf add(delly) } - require(catVariants.inputFiles.nonEmpty, "At least 1 SV-type must be selected for Delly [DEL/DUP/INV/TRA]") + require(catVariants.variant.nonEmpty, "Must atleast 1 SV-type be selected for Delly") add(catVariants) addVCF(sample, catVariants.outputFile) diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala similarity index 100% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/SvCaller.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/SvCaller.scala similarity index 100% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/SvCaller.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/SvCaller.scala diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Bcftools.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Bcftools.scala similarity index 100% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Bcftools.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Bcftools.scala diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/BcftoolsSingleSample.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/BcftoolsSingleSample.scala similarity index 100% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/BcftoolsSingleSample.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/BcftoolsSingleSample.scala diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala similarity index 100% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCaller.scala similarity index 67% rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCaller.scala index be299d9a9ca43567ba7f0f81123ed29cc4e2031e..1224592eb7fb66eb4075eb9aff9215379d6553c4 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCaller.scala @@ -3,11 +3,10 @@ * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions * on how to use this protected part of biopet or contact us at sasc@lumc.nl */ -package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers -import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.broad /** Default mode for the haplotypecaller */ class HaplotypeCaller(val root: Configurable) extends Variantcaller { @@ -15,7 +14,7 @@ class HaplotypeCaller(val root: Configurable) extends Variantcaller { protected def defaultPrio = 1 def biopetScript() { - val hc = broad.HaplotypeCaller(this, inputBams.values.toList, outputFile) + val hc = gatk.HaplotypeCaller(this, inputBams.values.toList, outputFile) add(hc) } } diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerAllele.scala similarity index 60% rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerAllele.scala index d743b4a528bf0803ba305287a993c758bc8d1d8a..09e7b5e0286fee0da538c23ce9d8b5f639df1555 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerAllele.scala @@ -3,11 +3,10 @@ * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions * on how to use this protected part of biopet or contact us at sasc@lumc.nl */ -package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers -import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.broad /** Allele mode for Haplotypecaller */ class HaplotypeCallerAllele(val root: Configurable) extends Variantcaller { @@ -15,9 +14,9 @@ class HaplotypeCallerAllele(val root: Configurable) extends Variantcaller { protected def defaultPrio = 5 def biopetScript() { - val hc = broad.HaplotypeCaller(this, inputBams.values.toList, outputFile) + val hc = gatk.HaplotypeCaller(this, inputBams.values.toList, outputFile) hc.alleles = config("input_alleles") - hc.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES + hc.genotyping_mode = Some("GENOTYPE_GIVEN_ALLELES") add(hc) } } diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala similarity index 64% rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala index 1491135cc566d954baa3793335f3fb6f27c3c645..2f7b8446b4c0a64b1348edb396f43fedcaaab4d3 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala @@ -3,11 +3,10 @@ * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions * on how to use this protected part of biopet or contact us at sasc@lumc.nl */ -package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers -import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.broad /** Gvcf mode for haplotypecaller */ class HaplotypeCallerGvcf(val root: Configurable) extends Variantcaller { @@ -16,12 +15,12 @@ class HaplotypeCallerGvcf(val root: Configurable) extends Variantcaller { def biopetScript() { val gvcfFiles = for ((sample, inputBam) <- inputBams) yield { - val hc = broad.HaplotypeCaller.gvcf(this, inputBam, new File(outputDir, sample + ".gvcf.vcf.gz")) + val hc = gatk.HaplotypeCaller.gvcf(this, inputBam, new File(outputDir, sample + ".gvcf.vcf.gz")) add(hc) hc.out } - val genotypeGVCFs = broad.GenotypeGVCFs(this, gvcfFiles.toList, outputFile) + val genotypeGVCFs = gatk.GenotypeGVCFs(this, gvcfFiles.toList, outputFile) add(genotypeGVCFs) } } diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala similarity index 92% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala index 847e671166191da3153cc2df818828c66de37aa1..ec46b9c348b3761786195777057852a96f6b214f 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala @@ -15,11 +15,9 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers -import java.io.File - import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup -import nl.lumc.sasc.biopet.extensions.tools.{ VcfFilter, MpileupToVcf } +import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter } import nl.lumc.sasc.biopet.utils.config.Configurable /** Makes a vcf file from a mpileup without statistics */ @@ -60,9 +58,9 @@ class RawVcf(val root: Configurable) extends Variantcaller { } val cv = new CombineVariants(this) - cv.inputFiles = rawFiles.toList - cv.outputFile = outputFile - cv.setKey = "null" + cv.variant = rawFiles.toList + cv.out = outputFile + cv.setKey = Some("null") cv.excludeNonVariants = !keepRefCalls add(cv) } diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyper.scala similarity index 67% rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyper.scala index 5cf58b69df5c7d03e4c2a8d8621c623deb9a7937..43fbe730d4b585edacff62b7a8388a5c82fbe062 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyper.scala @@ -3,11 +3,10 @@ * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions * on how to use this protected part of biopet or contact us at sasc@lumc.nl */ -package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers -import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.broad /** Default mode for UnifiedGenotyper */ class UnifiedGenotyper(val root: Configurable) extends Variantcaller { @@ -15,7 +14,7 @@ class UnifiedGenotyper(val root: Configurable) extends Variantcaller { protected def defaultPrio = 20 def biopetScript() { - val ug = broad.UnifiedGenotyper(this, inputBams.values.toList, outputFile) + val ug = gatk.UnifiedGenotyper(this, inputBams.values.toList, outputFile) add(ug) } } diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyperAllele.scala similarity index 60% rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyperAllele.scala index 43278defac34fbd5e3298c29587de51147b82f0e..364691f517c7434a39dc498a58ac1349e7e46d2f 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyperAllele.scala @@ -3,11 +3,10 @@ * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions * on how to use this protected part of biopet or contact us at sasc@lumc.nl */ -package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers -import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.broad /** Allele mode for GenotyperAllele */ class UnifiedGenotyperAllele(val root: Configurable) extends Variantcaller { @@ -15,9 +14,9 @@ class UnifiedGenotyperAllele(val root: Configurable) extends Variantcaller { protected def defaultPrio = 9 def biopetScript() { - val ug = broad.UnifiedGenotyper(this, inputBams.values.toList, outputFile) + val ug = gatk.UnifiedGenotyper(this, inputBams.values.toList, outputFile) ug.alleles = config("input_alleles") - ug.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES + ug.genotyping_mode = Some("GENOTYPE_GIVEN_ALLELES") add(ug) } } diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala similarity index 100% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala similarity index 92% rename from public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala rename to shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala index 9a0fb2839413948de68d3d16101fc4ce912df5b3..cb213f28e6485c04c7bf8f76a7293cf062516d8a 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala @@ -35,7 +35,8 @@ class VarscanCnsSingleSample(val root: Configurable) extends Variantcaller { "disable_baq" -> true, "depth" -> 1000000 ), - "varscanmpileup2cns" -> Map("strand_filter" -> 0) + "varscanmpileup2cns" -> Map("strand_filter" -> 0), + "combinevariants" -> Map("scattercount" -> 20) ) override def fixedValues = Map( @@ -67,9 +68,9 @@ class VarscanCnsSingleSample(val root: Configurable) extends Variantcaller { } val cv = new CombineVariants(this) - cv.inputFiles = sampleVcfs - cv.outputFile = outputFile - cv.setKey = "null" + cv.variant = sampleVcfs + cv.out = outputFile + cv.setKey = Some("null") cv.excludeNonVariants = true add(cv) } diff --git a/public/shiva/src/test/resources/log4j.properties b/shiva/src/test/resources/log4j.properties similarity index 100% rename from public/shiva/src/test/resources/log4j.properties rename to shiva/src/test/resources/log4j.properties diff --git a/public/shiva/src/test/resources/paired01.bam b/shiva/src/test/resources/paired01.bam similarity index 100% rename from public/shiva/src/test/resources/paired01.bam rename to shiva/src/test/resources/paired01.bam diff --git a/public/shiva/src/test/resources/paired01.bam.bai b/shiva/src/test/resources/paired01.bam.bai similarity index 100% rename from public/shiva/src/test/resources/paired01.bam.bai rename to shiva/src/test/resources/paired01.bam.bai diff --git a/public/mapping/src/test/resources/ref.dict b/shiva/src/test/resources/ref.dict similarity index 100% rename from public/mapping/src/test/resources/ref.dict rename to shiva/src/test/resources/ref.dict diff --git a/public/mapping/src/test/resources/ref.fa b/shiva/src/test/resources/ref.fa similarity index 100% rename from public/mapping/src/test/resources/ref.fa rename to shiva/src/test/resources/ref.fa diff --git a/public/mapping/src/test/resources/ref.fa.fai b/shiva/src/test/resources/ref.fa.fai similarity index 100% rename from public/mapping/src/test/resources/ref.fa.fai rename to shiva/src/test/resources/ref.fa.fai diff --git a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingTest.scala similarity index 98% rename from public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingTest.scala rename to shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingTest.scala index b0eaaa4303b9725ce88c2856d3d050060196140f..c8e163cf81fd4173f99ba9ac4b47e8dd4e6cccac 100644 --- a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingTest.scala +++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingTest.scala @@ -23,7 +23,7 @@ import nl.lumc.sasc.biopet.extensions.breakdancer.{ BreakdancerCaller, Breakdanc import nl.lumc.sasc.biopet.extensions.clever.CleverCaller import nl.lumc.sasc.biopet.extensions.delly.DellyCaller import nl.lumc.sasc.biopet.extensions.pindel.{ PindelCaller, PindelConfig, PindelVCF } -import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging } import nl.lumc.sasc.biopet.utils.config.Config import org.broadinstitute.gatk.queue.QSettings import org.scalatest.Matchers @@ -152,9 +152,9 @@ class ShivaSvCallingTest extends TestNGSuite with Matchers { pipeline.inputBams = Map("bam" -> ShivaSvCallingTest.inputTouch("bam" + ".bam")) intercept[IllegalArgumentException] { - pipeline.init() pipeline.script() } + Logging.errors.clear() } private def resourcePath(p: String): String = { diff --git a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala similarity index 61% rename from protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaTest.scala rename to shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala index 8be9ae9a79066c233c1c92b6dfb2f9cb1432385f..eb1d40ece24ce6862023eae3343b84c840ea3e42 100644 --- a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaTest.scala +++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala @@ -1,19 +1,28 @@ /** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. */ -package nl.lumc.sasc.biopet.pipelines.gatk +package nl.lumc.sasc.biopet.pipelines.shiva import java.io.{ File, FileOutputStream } import com.google.common.io.Files -import nl.lumc.sasc.biopet.utils.config.Config -import nl.lumc.sasc.biopet.extensions.bwa.BwaMem -import nl.lumc.sasc.biopet.extensions.gatk.broad._ -import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, SortSam } +import nl.lumc.sasc.biopet.extensions.gatk.{ BaseRecalibrator, IndelRealigner, PrintReads, RealignerTargetCreator } +import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates import nl.lumc.sasc.biopet.extensions.tools.VcfStats import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Config import org.broadinstitute.gatk.queue.QSettings import org.scalatest.Matchers import org.scalatest.testng.TestNGSuite @@ -24,7 +33,7 @@ import org.testng.annotations.{ DataProvider, Test } * * Created by pjvan_thof on 3/2/15. */ -class ShivaTest extends TestNGSuite with Matchers { +trait ShivaTestTrait extends TestNGSuite with Matchers { def initPipeline(map: Map[String, Any]): Shiva = { new Shiva() { override def configNamespace = "shiva" @@ -36,24 +45,33 @@ class ShivaTest extends TestNGSuite with Matchers { @DataProvider(name = "shivaOptions") def shivaOptions = { - val bool = Array(true, false) - for ( - s1 <- bool; s2 <- bool; multi <- bool; - dbsnp <- bool; realign <- bool; baseRecalibration <- bool - ) yield Array("", s1, s2, multi, dbsnp, realign, baseRecalibration) + s1 <- sample1; s2 <- sample2; + realign <- realignProvider; baseRecalibration <- baseRecalibrationProvider + ) yield Array("", s1, s2, realign, baseRecalibration) } + def sample1 = Array(false, true) + def sample2 = Array(false, true) + def realignProvider = Array(false, true) + def baseRecalibrationProvider = Array(false, true) + def multisampleCalling: Boolean = true + def sampleCalling = false + def libraryCalling = false + def dbsnp: Boolean = true + @Test(dataProvider = "shivaOptions") def testShiva(f: String, sample1: Boolean, sample2: Boolean, - multi: Boolean, dbsnp: Boolean, realign: Boolean, baseRecalibration: Boolean): Unit = { val map = { var m: Map[String, Any] = ShivaTest.config if (sample1) m = ConfigUtils.mergeMaps(ShivaTest.sample1, m) if (sample2) m = ConfigUtils.mergeMaps(ShivaTest.sample2, m) - if (dbsnp) m = ConfigUtils.mergeMaps(Map("dbsnp" -> "test"), m) - ConfigUtils.mergeMaps(Map("multisample_variantcalling" -> multi, + if (dbsnp) m = ConfigUtils.mergeMaps(Map("dbsnp" -> "test.vcf.gz"), m) + ConfigUtils.mergeMaps(Map( + "multisample_variantcalling" -> multisampleCalling, + "single_sample_variantcalling" -> sampleCalling, + "library_variantcalling" -> libraryCalling, "use_indel_realigner" -> realign, "use_base_recalibration" -> baseRecalibration), m) @@ -75,14 +93,39 @@ class ShivaTest extends TestNGSuite with Matchers { // Gatk preprocess pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0)) pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0)) - pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0) + pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) (numberLibs * 2) else 0) pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0) - pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (if (multi) 2 else 0) + pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe ( + (if (multisampleCalling) 2 else 0) + + (if (sampleCalling) numberSamples * 2 else 0) + + (if (libraryCalling) numberLibs * 2 else 0)) } } } +class ShivaDefaultTest extends ShivaTestTrait +class ShivaNoDbsnpTest extends ShivaTestTrait { + override def sample1 = Array(true) + override def sample2 = Array(false) + override def realignProvider = Array(true) + override def dbsnp = false +} +class ShivaLibraryCallingTest extends ShivaTestTrait { + override def sample1 = Array(true, false) + override def sample2 = Array(false, true) + override def realignProvider = Array(false) + override def baseRecalibrationProvider = Array(false) + override def libraryCalling = true +} +class ShivaSampleCallingTest extends ShivaTestTrait { + override def sample1 = Array(true, false) + override def sample2 = Array(false, true) + override def realignProvider = Array(false) + override def baseRecalibrationProvider = Array(false) + override def sampleCalling = true +} + object ShivaTest { val outputDir = Files.createTempDir() new File(outputDir, "input").mkdirs() diff --git a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..8c9dcb1e5496d3e43792bcc83d56644396eaeee6 --- /dev/null +++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala @@ -0,0 +1,194 @@ +/** + * Due to the license issue with GATK, this part of Biopet can only be used inside the + * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions + * on how to use this protected part of biopet or contact us at sasc@lumc.nl + */ +package nl.lumc.sasc.biopet.pipelines.shiva + +import java.io.{ File, FileOutputStream } + +import com.google.common.io.Files +import nl.lumc.sasc.biopet.core.BiopetPipe +import nl.lumc.sasc.biopet.extensions.Freebayes +import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge } +import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, HaplotypeCaller, UnifiedGenotyper } +import nl.lumc.sasc.biopet.utils.config.Config +import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats } +import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.broadinstitute.gatk.queue.QSettings +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.{ DataProvider, Test } + +import scala.collection.mutable.ListBuffer + +/** + * Class for testing ShivaVariantcalling + * + * Created by pjvan_thof on 3/2/15. + */ +trait ShivaVariantcallingTestTrait extends TestNGSuite with Matchers { + def initPipeline(map: Map[String, Any]): ShivaVariantcalling = { + new ShivaVariantcalling() { + override def configNamespace = "shivavariantcalling" + override def globalConfig = new Config(ConfigUtils.mergeMaps(map, ShivaVariantcallingTest.config)) + qSettings = new QSettings + qSettings.runName = "test" + } + } + + def raw: Boolean = false + def bcftools: Boolean = false + def bcftools_singlesample: Boolean = false + def haplotypeCallerGvcf: Boolean = false + def haplotypeCallerAllele: Boolean = false + def unifiedGenotyperAllele: Boolean = false + def unifiedGenotyper: Boolean = false + def haplotypeCaller: Boolean = false + def freebayes: Boolean = false + def varscanCnsSinglesample: Boolean = false + + @DataProvider(name = "shivaVariantcallingOptions") + def shivaVariantcallingOptions = { + (for (bams <- 0 to 2) yield Array[Any](bams, raw, bcftools, bcftools_singlesample, unifiedGenotyper, + haplotypeCaller, haplotypeCallerGvcf, haplotypeCallerAllele, unifiedGenotyperAllele, + freebayes, varscanCnsSinglesample) + ).toArray + } + + @Test(dataProvider = "shivaVariantcallingOptions") + def testShivaVariantcalling(bams: Int, + raw: Boolean, + bcftools: Boolean, + bcftoolsSinglesample: Boolean, + unifiedGenotyper: Boolean, + haplotypeCaller: Boolean, + haplotypeCallerGvcf: Boolean, + haplotypeCallerAllele: Boolean, + unifiedGenotyperAllele: Boolean, + freebayes: Boolean, + varscanCnsSinglesample: Boolean) = { + val callers: ListBuffer[String] = ListBuffer() + if (raw) callers.append("raw") + if (bcftools) callers.append("bcftools") + if (bcftoolsSinglesample) callers.append("bcftools_singlesample") + if (unifiedGenotyper) callers.append("unifiedgenotyper") + if (haplotypeCallerGvcf) callers.append("haplotypecaller_gvcf") + if (haplotypeCallerAllele) callers.append("haplotypecaller_allele") + if (unifiedGenotyperAllele) callers.append("unifiedgenotyper_allele") + if (haplotypeCaller) callers.append("haplotypecaller") + if (freebayes) callers.append("freebayes") + if (varscanCnsSinglesample) callers.append("varscan_cns_singlesample") + val map = Map("variantcallers" -> callers.toList) + val pipeline = initPipeline(map) + + pipeline.inputBams = (for (n <- 1 to bams) yield n.toString -> ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toMap + + val illegalArgumentException = pipeline.inputBams.isEmpty || callers.isEmpty + + if (illegalArgumentException) intercept[IllegalArgumentException] { + pipeline.script() + } + + if (!illegalArgumentException) { + pipeline.script() + + val pipesJobs = pipeline.functions.filter(_.isInstanceOf[BiopetPipe]).flatMap(_.asInstanceOf[BiopetPipe].pipesJobs) + + pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe (1 + (if (raw) 1 else 0) + (if (varscanCnsSinglesample) 1 else 0)) + pipesJobs.count(_.isInstanceOf[BcftoolsCall]) shouldBe (if (bcftools) 1 else 0) + (if (bcftoolsSinglesample) bams else 0) + pipeline.functions.count(_.isInstanceOf[BcftoolsMerge]) shouldBe (if (bcftoolsSinglesample && bams > 1) 1 else 0) + pipesJobs.count(_.isInstanceOf[Freebayes]) shouldBe (if (freebayes) 1 else 0) + pipesJobs.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0) + pipeline.functions.count(_.isInstanceOf[VcfFilter]) shouldBe (if (raw) bams else 0) + pipeline.functions.count(_.isInstanceOf[HaplotypeCaller]) shouldBe (if (haplotypeCaller) 1 else 0) + + (if (haplotypeCallerAllele) 1 else 0) + (if (haplotypeCallerGvcf) bams else 0) + pipeline.functions.count(_.isInstanceOf[UnifiedGenotyper]) shouldBe (if (unifiedGenotyper) 1 else 0) + + (if (unifiedGenotyperAllele) 1 else 0) + pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (1 + callers.size) + } + } +} + +class ShivaVariantcallingNoVariantcallersTest extends ShivaVariantcallingTestTrait +class ShivaVariantcallingAllTest extends ShivaVariantcallingTestTrait { + override def raw: Boolean = true + override def bcftools: Boolean = true + override def bcftools_singlesample: Boolean = true + override def haplotypeCallerGvcf: Boolean = true + override def haplotypeCallerAllele: Boolean = true + override def unifiedGenotyperAllele: Boolean = true + override def unifiedGenotyper: Boolean = true + override def haplotypeCaller: Boolean = true + override def freebayes: Boolean = true + override def varscanCnsSinglesample: Boolean = true +} +class ShivaVariantcallingRawTest extends ShivaVariantcallingTestTrait { + override def raw: Boolean = true +} +class ShivaVariantcallingBcftoolsTest extends ShivaVariantcallingTestTrait { + override def bcftools: Boolean = true +} +class ShivaVariantcallingBcftoolsSinglesampleTest extends ShivaVariantcallingTestTrait { + override def bcftools_singlesample: Boolean = true +} +class ShivaVariantcallingHaplotypeCallerGvcfTest extends ShivaVariantcallingTestTrait { + override def haplotypeCallerGvcf: Boolean = true +} +class ShivaVariantcallingHaplotypeCallerAlleleTest extends ShivaVariantcallingTestTrait { + override def haplotypeCallerAllele: Boolean = true +} +class ShivaVariantcallingUnifiedGenotyperAlleleTest extends ShivaVariantcallingTestTrait { + override def unifiedGenotyperAllele: Boolean = true +} +class ShivaVariantcallingUnifiedGenotyperTest extends ShivaVariantcallingTestTrait { + override def unifiedGenotyper: Boolean = true +} +class ShivaVariantcallingHaplotypeCallerTest extends ShivaVariantcallingTestTrait { + override def haplotypeCaller: Boolean = true +} +class ShivaVariantcallingFreebayesTest extends ShivaVariantcallingTestTrait { + override def freebayes: Boolean = true +} +class ShivaVariantcallingVarscanCnsSinglesampleTest extends ShivaVariantcallingTestTrait { + override def varscanCnsSinglesample: Boolean = true +} + +object ShivaVariantcallingTest { + val outputDir = Files.createTempDir() + outputDir.deleteOnExit() + new File(outputDir, "input").mkdirs() + def inputTouch(name: String): File = { + val file = new File(outputDir, "input" + File.separator + name).getAbsoluteFile + Files.touch(file) + file + } + + private def copyFile(name: String): Unit = { + val is = getClass.getResourceAsStream("/" + name) + val os = new FileOutputStream(new File(outputDir, name)) + org.apache.commons.io.IOUtils.copy(is, os) + os.close() + } + + copyFile("ref.fa") + copyFile("ref.dict") + copyFile("ref.fa.fai") + + val config = Map( + "name_prefix" -> "test", + "output_dir" -> outputDir, + "cache" -> true, + "dir" -> "test", + "vep_script" -> "test", + "reference_fasta" -> (outputDir + File.separator + "ref.fa"), + "gatk_jar" -> "test", + "samtools" -> Map("exe" -> "test"), + "bcftools" -> Map("exe" -> "test"), + "md5sum" -> Map("exe" -> "test"), + "bgzip" -> Map("exe" -> "test"), + "tabix" -> Map("exe" -> "test"), + "input_alleles" -> "test.vcf.gz", + "varscan_jar" -> "test" + ) +} \ No newline at end of file diff --git a/public/src/src/test/resources/log4j.properties b/src/src/test/resources/log4j.properties similarity index 100% rename from public/src/src/test/resources/log4j.properties rename to src/src/test/resources/log4j.properties diff --git a/public/tinycap/.gitignore b/tinycap/.gitignore similarity index 100% rename from public/tinycap/.gitignore rename to tinycap/.gitignore diff --git a/public/tinycap/pom.xml b/tinycap/pom.xml similarity index 100% rename from public/tinycap/pom.xml rename to tinycap/pom.xml diff --git a/public/tinycap/src/main/resources/nl/lumc/sasc/biopet/pipelines/tinycap/tinycapFront.ssp b/tinycap/src/main/resources/nl/lumc/sasc/biopet/pipelines/tinycap/tinycapFront.ssp similarity index 100% rename from public/tinycap/src/main/resources/nl/lumc/sasc/biopet/pipelines/tinycap/tinycapFront.ssp rename to tinycap/src/main/resources/nl/lumc/sasc/biopet/pipelines/tinycap/tinycapFront.ssp diff --git a/public/tinycap/src/main/scala/nl/lumc/sasc/biopet/pipelines/tinycap/TinyCap.scala b/tinycap/src/main/scala/nl/lumc/sasc/biopet/pipelines/tinycap/TinyCap.scala similarity index 100% rename from public/tinycap/src/main/scala/nl/lumc/sasc/biopet/pipelines/tinycap/TinyCap.scala rename to tinycap/src/main/scala/nl/lumc/sasc/biopet/pipelines/tinycap/TinyCap.scala diff --git a/public/tinycap/src/main/scala/nl/lumc/sasc/biopet/pipelines/tinycap/TinyCapReport.scala b/tinycap/src/main/scala/nl/lumc/sasc/biopet/pipelines/tinycap/TinyCapReport.scala similarity index 100% rename from public/tinycap/src/main/scala/nl/lumc/sasc/biopet/pipelines/tinycap/TinyCapReport.scala rename to tinycap/src/main/scala/nl/lumc/sasc/biopet/pipelines/tinycap/TinyCapReport.scala diff --git a/public/tinycap/src/main/scala/nl/lumc/sasc/biopet/pipelines/tinycap/measures/FragmentsPerSmallRna.scala b/tinycap/src/main/scala/nl/lumc/sasc/biopet/pipelines/tinycap/measures/FragmentsPerSmallRna.scala similarity index 100% rename from public/tinycap/src/main/scala/nl/lumc/sasc/biopet/pipelines/tinycap/measures/FragmentsPerSmallRna.scala rename to tinycap/src/main/scala/nl/lumc/sasc/biopet/pipelines/tinycap/measures/FragmentsPerSmallRna.scala diff --git a/public/tinycap/src/test/resources/log4j.properties b/tinycap/src/test/resources/log4j.properties similarity index 100% rename from public/tinycap/src/test/resources/log4j.properties rename to tinycap/src/test/resources/log4j.properties diff --git a/public/tinycap/src/test/scala/nl/lumc/sasc/biopet/pipelines/tinycap/TinyCapTest.scala b/tinycap/src/test/scala/nl/lumc/sasc/biopet/pipelines/tinycap/TinyCapTest.scala similarity index 100% rename from public/tinycap/src/test/scala/nl/lumc/sasc/biopet/pipelines/tinycap/TinyCapTest.scala rename to tinycap/src/test/scala/nl/lumc/sasc/biopet/pipelines/tinycap/TinyCapTest.scala diff --git a/public/kopisu/.gitignore b/toucan/.gitignore similarity index 100% rename from public/kopisu/.gitignore rename to toucan/.gitignore diff --git a/public/toucan/pom.xml b/toucan/pom.xml similarity index 81% rename from public/toucan/pom.xml rename to toucan/pom.xml index 781e458c31cc8128843b55873781e3aaa9f8b1e0..62a9699f8f63680d1c8b6bce67a7390907ca8b34 100644 --- a/public/toucan/pom.xml +++ b/toucan/pom.xml @@ -43,5 +43,17 @@ <artifactId>BiopetToolsExtensions</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.testng</groupId> + <artifactId>testng</artifactId> + <version>6.8</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.10</artifactId> + <version>2.2.1</version> + <scope>test</scope> + </dependency> </dependencies> </project> diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweActivateAfterAnnotImport.scala b/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweActivateAfterAnnotImport.scala similarity index 100% rename from public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweActivateAfterAnnotImport.scala rename to toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweActivateAfterAnnotImport.scala diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala b/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala similarity index 100% rename from public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala rename to toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala b/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala similarity index 84% rename from public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala rename to toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala index 2e3ffe26a1735dbea0f664bf8c4c957bd7262f7b..58dcaf82447daef751eae0d21ac7c43387026149 100644 --- a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala +++ b/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala @@ -40,15 +40,17 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum @Input(doc = "Input GVCF file", shortName = "gvcf", required = false) var inputGvcf: Option[File] = None - var sampleIds: List[String] = Nil + var outputVcf: Option[File] = None + + def sampleInfo: Map[String, Map[String, Any]] = root match { + case m: MultiSampleQScript => m.samples.map { case (sampleId, sample) => sampleId -> sample.sampleTags } + case null => VcfUtils.getSampleIds(inputVCF).map(x => x -> Map[String, Any]()).toMap + case s: SampleLibraryTag => s.sampleId.map(x => x -> Map[String, Any]()).toMap + case _ => throw new IllegalArgumentException("") + } + def init(): Unit = { inputFiles :+= new InputFile(inputVCF) - sampleIds = root match { - case m: MultiSampleQScript => m.samples.keys.toList - case null => VcfUtils.getSampleIds(inputVCF) - case s: SampleLibraryTag => s.sampleId.toList - case _ => throw new IllegalArgumentException("You don't have any samples") - } } override def defaults = Map( @@ -79,29 +81,29 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum val gonlVcfFile: Option[File] = config("gonl_vcf") val exacVcfFile: Option[File] = config("exac_vcf") - var outputFile = normalizer.outputVcf + outputVcf = Some(normalizer.outputVcf) gonlVcfFile match { case Some(gonlFile) => val vcfWithVcf = new VcfWithVcf(this) - vcfWithVcf.input = outputFile + vcfWithVcf.input = outputVcf.getOrElse(new File("")) vcfWithVcf.secondaryVcf = gonlFile vcfWithVcf.output = swapExt(outputDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz") vcfWithVcf.fields ::= ("AF", "AF_gonl", None) add(vcfWithVcf) - outputFile = vcfWithVcf.output + outputVcf = Some(vcfWithVcf.output) case _ => } exacVcfFile match { case Some(exacFile) => val vcfWithVcf = new VcfWithVcf(this) - vcfWithVcf.input = outputFile + vcfWithVcf.input = outputVcf.getOrElse(new File("")) vcfWithVcf.secondaryVcf = exacFile - vcfWithVcf.output = swapExt(outputDir, outputFile, ".vcf.gz", ".exac.vcf.gz") + vcfWithVcf.output = swapExt(outputDir, outputVcf.getOrElse(new File("")), ".vcf.gz", ".exac.vcf.gz") vcfWithVcf.fields ::= ("AF", "AF_exac", None) add(vcfWithVcf) - outputFile = vcfWithVcf.output + outputVcf = Some(vcfWithVcf.output) case _ => } @@ -116,7 +118,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum * @param annotation: ManweDownloadAnnotateVcf object of annotated vcf * @return */ - def importAndActivateSample(sampleID: String, inputVcf: File, + def importAndActivateSample(sampleID: String, sampleGroups: List[String], inputVcf: File, gVCF: File, annotation: ManweAnnotateVcf): ManweActivateAfterAnnotImport = { val minGQ: Int = config("minimum_genome_quality", default = 20, namespace = "manwe") @@ -165,6 +167,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum imported.beds = List(bgzippedBed.output) imported.name = Some(sampleID) imported.public = isPublic + imported.group = sampleGroups imported.waitToComplete = false imported.isIntermediate = true imported.output = swapExt(outputDir, intersected.output, ".vcf.gz", ".manwe.import") @@ -186,7 +189,6 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum def varda(vcf: File, gVcf: File): File = { val annotationQueries: List[String] = config("annotation_queries", default = List("GLOBAL *"), namespace = "manwe") - //TODO: add groups!!! Need sample-specific group tags for this val annotate = new ManweAnnotateVcf(this) annotate.vcf = vcf @@ -202,7 +204,14 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum annotatedVcf.output = swapExt(outputDir, annotate.output, ".manwe.annot", "manwe.annot.vcf.gz") add(annotatedVcf) - val activates = sampleIds map { x => importAndActivateSample(x, vcf, gVcf, annotate) } + val activates = sampleInfo map { x => + val sampleGroup = x._2.getOrElse("varda_group", Nil) match { + case x: List[String] => x + case Nil => Nil + case _ => throw new IllegalArgumentException("Sample tag 'varda_group' is not a list of strings") + } + importAndActivateSample(x._1, sampleGroup, vcf, gVcf, annotate) + } val finalLn = new Ln(this) activates.foreach(x => finalLn.deps :+= x.output)