Commit 8ce1b37a authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'develop' of git.lumc.nl:biopet/biopet into feature-seqstat-scala

parents d93b8076 a92d12f8
......@@ -22,6 +22,7 @@
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<processorPath useClasspath="true" />
<module name="Bam2Wig" />
<module name="BamMetrics" />
<module name="Basty" />
<module name="BiopetFramework" />
......
......@@ -8,6 +8,7 @@
<file url="file://$PROJECT_DIR$/protected/biopet-gatk-pipelines" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/protected/biopet-protected-package" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/public" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/public/bam2wig" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/public/bammetrics" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/public/biopet-framework" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/public/biopet-public-package" charset="UTF-8" />
......
<component name="libraryTable">
<library name="Maven: nl.lumc.sasc:Bam2Wig:0.3.0-DEV">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/nl/lumc/sasc/Bam2Wig/0.3.0-DEV/Bam2Wig-0.3.0-DEV.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/nl/lumc/sasc/Bam2Wig/0.3.0-DEV/Bam2Wig-0.3.0-DEV-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/nl/lumc/sasc/Bam2Wig/0.3.0-DEV/Bam2Wig-0.3.0-DEV-sources.jar!/" />
</SOURCES>
</library>
</component>
\ No newline at end of file
......@@ -2,6 +2,7 @@
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/public/bam2wig/Bam2Wig.iml" filepath="$PROJECT_DIR$/public/bam2wig/Bam2Wig.iml" />
<module fileurl="file://$PROJECT_DIR$/public/bammetrics/BamMetrics.iml" filepath="$PROJECT_DIR$/public/bammetrics/BamMetrics.iml" />
<module fileurl="file://$PROJECT_DIR$/protected/basty/Basty.iml" filepath="$PROJECT_DIR$/protected/basty/Basty.iml" />
<module fileurl="file://$PROJECT_DIR$/public/Biopet.iml" filepath="$PROJECT_DIR$/public/Biopet.iml" />
......
......@@ -20,6 +20,8 @@
<parameter value="$PROJECT_DIR$/public/flexiprep/target/.scala_dependencies" />
<parameter value="$PROJECT_DIR$/public/mapping/target/.scala_dependencies" />
<parameter value="$PROJECT_DIR$/protected/biopet-gatk-extensions/target/.scala_dependencies" />
<parameter value="$PROJECT_DIR$/public/bamtobigwig/target/.scala_dependencies" />
<parameter value="$PROJECT_DIR$/public/bam2wig/target/.scala_dependencies" />
</parameters>
</component>
</project>
\ No newline at end of file
......@@ -72,16 +72,16 @@ Global setting examples are:
#### Example settings config
~~~
{
"reference": "/data/LGTC/projects/vandoorn-melanoma/data/references/hg19_nohap/ucsc.hg19_nohap.fasta",
"dbsnp": "/data/LGTC/projects/vandoorn-melanoma/data/references/hg19_nohap/dbsnp_137.hg19_nohap.vcf",
"reference": "/references/hg19_nohap/ucsc.hg19_nohap.fasta",
"dbsnp": "/references/hg19_nohap/dbsnp_137.hg19_nohap.vcf",
"joint_variantcalling": false,
"haplotypecaller": { "scattercount": 100 },
"multisample": { "haplotypecaller": { "scattercount": 1000 } },
"picard": { "validationstringency": "LENIENT" },
"library_variantcalling_temp": true,
"target_bed_temp": "/data/LGTC/projects/vandoorn-melanoma/analysis/target.bed",
"target_bed_temp": "analysis/target.bed",
"min_dp": 5,
"bedtools": {"exe":"/share/isilon/system/local/BEDtools/bedtools-2.17.0/bin/bedtools"},
"bedtools": {"exe":"/BEDtools/bedtools-2.17.0/bin/bedtools"},
"bam_to_fastq": true,
"baserecalibrator": { "memory_limit": 8, "vmem":"16G" },
"samtofastq": {"memory_limit": 8, "vmem": "16G"},
......@@ -95,4 +95,4 @@ Global setting examples are:
### JSON validation
To check if the JSON file created is correct we can use multiple options the simplest way is using [this](http://jsonformatter.curiousconcept.com/)
website. It is also possible to use Python or Scala for validating but this requires some more knowledge.
\ No newline at end of file
website. It is also possible to use Python or Scala for validating but this requires some more knowledge.
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
......@@ -93,5 +93,6 @@
<orderEntry type="module" module-name="Mapping" />
<orderEntry type="module" module-name="Flexiprep" />
<orderEntry type="module" module-name="BamMetrics" />
<orderEntry type="module" module-name="Bam2Wig" />
</component>
</module>
\ No newline at end of file
......@@ -33,7 +33,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
def makeLibrary(id: String) = new Library(id)
class Library(libraryId: String) extends AbstractLibrary(libraryId) {
class Library(libId: String) extends AbstractLibrary(libId) {
protected def addJobs(): Unit = {}
}
......@@ -60,28 +60,34 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
}
def addMultiSampleJobs(): Unit = {
val refVariants = addGenerateFasta(null, outputDir + "reference/", outputName = "reference")
val refVariantSnps = addGenerateFasta(null, outputDir + "reference/", outputName = "reference", snpsOnly = true)
val refVariants = addGenerateFasta(null, new File(outputDir, "reference"), outputName = "reference")
val refVariantSnps = addGenerateFasta(null, new File(outputDir, "reference"), outputName = "reference", snpsOnly = true)
val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList, outputDir + "fastas/variant.fasta")
val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList,
new File(outputDir, "fastas" + File.separator + "variant.fasta"))
add(catVariants)
val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList, outputDir + "fastas/variant.snps_only.fasta")
val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList,
new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta"))
add(catVariantsSnps)
val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList, outputDir + "fastas/consensus.fasta")
val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList,
new File(outputDir, "fastas" + File.separator + "consensus.fasta"))
add(catConsensus)
val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList, outputDir + "fastas/consensus.snps_only.fasta")
val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList,
new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta"))
add(catConsensusSnps)
val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList, outputDir + "fastas/consensus.variant.fasta")
val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList,
new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta"))
add(catConsensusVariants)
val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList, outputDir + "fastas/consensus.variant.snps_only.fasta")
val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList,
new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta"))
add(catConsensusVariantsSnps)
val seed: Int = config("seed", default = 12345)
def addTreeJobs(variants: File, concensusVariants: File, outputDir: String, outputName: String) {
val dirSufixRaxml = if (outputDir.endsWith(File.separator)) "raxml" else File.separator + "raxml"
val dirSufixGubbins = if (outputDir.endsWith(File.separator)) "gubbins" else File.separator + "gubbins"
def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) {
val dirSufixRaxml = new File(outputDir, "raxml")
val dirSufixGubbins = new File(outputDir, "gubbins")
val raxmlMl = new Raxml(this)
raxmlMl.input = variants
......@@ -101,7 +107,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlBoot.p = seed
raxmlBoot.b = math.abs(r.nextInt)
raxmlBoot.w = outputDir + dirSufixRaxml
raxmlBoot.w = dirSufixRaxml
raxmlBoot.N = 1
raxmlBoot.n = outputName + "_boot_" + t
add(raxmlBoot)
......@@ -124,17 +130,19 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
val gubbins = new RunGubbins(this)
gubbins.fastafile = concensusVariants
gubbins.startingTree = raxmlBi.getBipartitionsFile
gubbins.outputDirectory = outputDir + dirSufixGubbins
gubbins.startingTree = Some(raxmlBi.getBipartitionsFile)
gubbins.outputDirectory = dirSufixGubbins
add(gubbins)
}
addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output, outputDir + "trees" + File.separator + "snps_only", "snps_only")
addTreeJobs(catVariants.output, catConsensusVariants.output, outputDir + "trees" + File.separator + "snps_indels", "snps_indels")
addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output,
new File(outputDir, "trees" + File.separator + "snps_only"), "snps_only")
addTreeJobs(catVariants.output, catConsensusVariants.output,
new File(outputDir, "trees" + File.separator + "snps_indels"), "snps_indels")
}
def addGenerateFasta(sampleName: String, outputDir: String, outputName: String = null,
def addGenerateFasta(sampleName: String, outputDir: File, outputName: String = null,
snpsOnly: Boolean = false): FastaOutput = {
val bastyGenerateFasta = new BastyGenerateFasta(this)
bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
......
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
......@@ -11,8 +11,8 @@ import nl.lumc.sasc.biopet.core.config.Configurable
class ApplyRecalibration(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.ApplyRecalibration with GatkGeneral {
scatterCount = config("scattercount", default = 0)
override def afterGraph {
super.afterGraph
override def beforeGraph {
super.beforeGraph
nt = Option(getThreads(3))
memoryLimit = Option(nt.getOrElse(1) * 2)
......
......@@ -12,7 +12,7 @@ class BaseRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.q
memoryLimit = Option(4)
override val defaultVmem = "8G"
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("scattercount")) scatterCount = config("scattercount", default = 1)
if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").asString)
if (config.contains("known_sites")) knownSites :+= new File(config("known_sites").asString)
}
......@@ -22,7 +22,7 @@ object BaseRecalibrator {
val br = new BaseRecalibrator(root)
br.input_file :+= input
br.out = output
br.afterGraph
br.beforeGraph
return br
}
}
\ No newline at end of file
......@@ -13,13 +13,13 @@ trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction {
override def subPath = "gatk" :: super.subPath
jarFile = config("gatk_jar", required = true)
jarFile = config("gatk_jar")
override val defaultVmem = "7G"
if (config.contains("intervals")) intervals = config("intervals").asFileList
if (config.contains("exclude_intervals")) excludeIntervals = config("exclude_intervals").asFileList
reference_sequence = config("reference")
gatk_key = config("gatk_key")
if (config.contains("gatk_key")) gatk_key = config("gatk_key")
if (config.contains("pedigree")) pedigree = config("pedigree").asFileList
}
......@@ -9,40 +9,40 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType
class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.HaplotypeCaller with GatkGeneral {
override def afterGraph {
super.afterGraph
min_mapping_quality_score = config("minMappingQualityScore", default = 20)
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("dbsnp")) this.dbsnp = config("dbsnp")
this.sample_ploidy = config("ploidy")
nct = config("threads", default = 1)
bamOutput = config("bamOutput")
memoryLimit = Option(nct.getOrElse(1) * 2)
if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs")
if (config.contains("output_mode")) {
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._
config("output_mode").asString match {
case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES
case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES
case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY
case e => logger.warn("output mode '" + e + "' does not exist")
}
min_mapping_quality_score = config("minMappingQualityScore", default = 20)
scatterCount = config("scattercount", default = 1)
if (config.contains("dbsnp")) this.dbsnp = config("dbsnp")
this.sample_ploidy = config("ploidy")
if (config.contains("bamOutput")) bamOutput = config("bamOutput")
if (config.contains("allSitePLs")) allSitePLs = config("allSitePLs")
if (config.contains("output_mode")) {
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._
config("output_mode").asString match {
case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES
case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES
case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY
case e => logger.warn("output mode '" + e + "' does not exist")
}
}
if (config("inputtype", default = "dna").asString == "rna") {
dontUseSoftClippedBases = config("dontusesoftclippedbases", default = true)
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
} else {
dontUseSoftClippedBases = config("dontusesoftclippedbases", default = false)
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
}
if (config("inputtype", default = "dna").asString == "rna") {
dontUseSoftClippedBases = config("dontusesoftclippedbases", default = true)
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
} else {
dontUseSoftClippedBases = config("dontusesoftclippedbases", default = false)
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
}
override def beforeGraph {
super.beforeGraph
if (bamOutput != null && nct.getOrElse(1) > 1) {
nct = Option(1)
threads = 1
logger.warn("BamOutput is on, nct/threads is forced to set on 1, this option is only for debug")
}
nct = Some(getThreads(1))
memoryLimit = Option(memoryLimit.getOrElse(2.0) * nct.getOrElse(1))
}
def useGvcf() {
......
......@@ -13,7 +13,7 @@ class IndelRealigner(val root: Configurable) extends org.broadinstitute.gatk.que
}
object IndelRealigner {
def apply(root: Configurable, input: File, targetIntervals: File, outputDir: String): IndelRealigner = {
def apply(root: Configurable, input: File, targetIntervals: File, outputDir: File): IndelRealigner = {
val ir = new IndelRealigner(root)
ir.input_file :+= input
ir.targetIntervals = targetIntervals
......
......@@ -18,7 +18,7 @@ class RealignerTargetCreator(val root: Configurable) extends org.broadinstitute.
}
object RealignerTargetCreator {
def apply(root: Configurable, input: File, outputDir: String): RealignerTargetCreator = {
def apply(root: Configurable, input: File, outputDir: File): RealignerTargetCreator = {
val re = new RealignerTargetCreator(root)
re.input_file :+= input
re.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.intervals")
......
......@@ -8,14 +8,14 @@ package nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.core.config.Configurable
class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.UnifiedGenotyper with GatkGeneral {
override def afterGraph {
super.afterGraph
override def beforeGraph {
super.beforeGraph
genotype_likelihoods_model = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.BOTH
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("dbsnp")) this.dbsnp = config("dbsnp")
this.sample_ploidy = config("ploidy")
nct = config("threads", default = 1)
nct = Some(getThreads(1))
memoryLimit = Option(nct.getOrElse(1) * 2)
if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs")
if (config.contains("output_mode")) {
......
......@@ -9,8 +9,8 @@ import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class VariantEval(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantEval with GatkGeneral {
override def afterGraph {
super.afterGraph
override def beforeGraph {
super.beforeGraph
}
}
......@@ -21,7 +21,7 @@ object VariantEval {
vareval.eval = Seq(sample)
vareval.comp = Seq(compareWith)
vareval.out = output
vareval.afterGraph
vareval.beforeGraph
return vareval
}
......@@ -35,7 +35,7 @@ object VariantEval {
vareval.ST = ST
vareval.noEV = true
vareval.EV = EV
vareval.afterGraph
vareval.beforeGraph
return vareval
}
......
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
......@@ -92,5 +92,6 @@
<orderEntry type="module" module-name="Mapping" />
<orderEntry type="module" module-name="Flexiprep" />
<orderEntry type="module" module-name="BamMetrics" />
<orderEntry type="module" module-name="Bam2Wig" />
</component>
</module>
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment