diff --git a/.idea/compiler.xml b/.idea/compiler.xml index 94588dbce1d488f6bcec18cad43bd6126e07cfe2..ca2890f7c77b6477d2626faf502aad195111564d 100644 --- a/.idea/compiler.xml +++ b/.idea/compiler.xml @@ -22,6 +22,7 @@ <sourceTestOutputDir name="target/generated-test-sources/test-annotations" /> <outputRelativeToContentRoot value="true" /> <processorPath useClasspath="true" /> + <module name="Bam2Wig" /> <module name="BamMetrics" /> <module name="Basty" /> <module name="BiopetFramework" /> diff --git a/.idea/encodings.xml b/.idea/encodings.xml index 308908157d0e7e0ec082d7dd82bba600ca468058..ee48a25fadabcef0b7349733c8f436f4ace537c2 100644 --- a/.idea/encodings.xml +++ b/.idea/encodings.xml @@ -8,6 +8,7 @@ <file url="file://$PROJECT_DIR$/protected/biopet-gatk-pipelines" charset="UTF-8" /> <file url="file://$PROJECT_DIR$/protected/biopet-protected-package" charset="UTF-8" /> <file url="file://$PROJECT_DIR$/public" charset="UTF-8" /> + <file url="file://$PROJECT_DIR$/public/bam2wig" charset="UTF-8" /> <file url="file://$PROJECT_DIR$/public/bammetrics" charset="UTF-8" /> <file url="file://$PROJECT_DIR$/public/biopet-framework" charset="UTF-8" /> <file url="file://$PROJECT_DIR$/public/biopet-public-package" charset="UTF-8" /> diff --git a/.idea/libraries/Maven__nl_lumc_sasc_Bam2Wig_0_3_0_DEV.xml b/.idea/libraries/Maven__nl_lumc_sasc_Bam2Wig_0_3_0_DEV.xml new file mode 100644 index 0000000000000000000000000000000000000000..f5ee6e36df6b26e1f52ef7696a43c9e7e3bd38f9 --- /dev/null +++ b/.idea/libraries/Maven__nl_lumc_sasc_Bam2Wig_0_3_0_DEV.xml @@ -0,0 +1,13 @@ +<component name="libraryTable"> + <library name="Maven: nl.lumc.sasc:Bam2Wig:0.3.0-DEV"> + <CLASSES> + <root url="jar://$MAVEN_REPOSITORY$/nl/lumc/sasc/Bam2Wig/0.3.0-DEV/Bam2Wig-0.3.0-DEV.jar!/" /> + </CLASSES> + <JAVADOC> + <root url="jar://$MAVEN_REPOSITORY$/nl/lumc/sasc/Bam2Wig/0.3.0-DEV/Bam2Wig-0.3.0-DEV-javadoc.jar!/" /> + </JAVADOC> + <SOURCES> + <root url="jar://$MAVEN_REPOSITORY$/nl/lumc/sasc/Bam2Wig/0.3.0-DEV/Bam2Wig-0.3.0-DEV-sources.jar!/" /> + </SOURCES> + </library> +</component> \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml index 7df432bc09c603fd65de3e993ac0b4a44a96aadb..02f8f99d7c62a9f26645050d2d69967894ed6528 100644 --- a/.idea/modules.xml +++ b/.idea/modules.xml @@ -2,6 +2,7 @@ <project version="4"> <component name="ProjectModuleManager"> <modules> + <module fileurl="file://$PROJECT_DIR$/public/bam2wig/Bam2Wig.iml" filepath="$PROJECT_DIR$/public/bam2wig/Bam2Wig.iml" /> <module fileurl="file://$PROJECT_DIR$/public/bammetrics/BamMetrics.iml" filepath="$PROJECT_DIR$/public/bammetrics/BamMetrics.iml" /> <module fileurl="file://$PROJECT_DIR$/protected/basty/Basty.iml" filepath="$PROJECT_DIR$/protected/basty/Basty.iml" /> <module fileurl="file://$PROJECT_DIR$/public/Biopet.iml" filepath="$PROJECT_DIR$/public/Biopet.iml" /> diff --git a/.idea/scala_compiler.xml b/.idea/scala_compiler.xml index 7fb11a07f6f1f6af473bbc77dc7702f2ab2d6a28..4a665f22502d17ad071dee0ea69c955613022acd 100644 --- a/.idea/scala_compiler.xml +++ b/.idea/scala_compiler.xml @@ -20,6 +20,8 @@ <parameter value="$PROJECT_DIR$/public/flexiprep/target/.scala_dependencies" /> <parameter value="$PROJECT_DIR$/public/mapping/target/.scala_dependencies" /> <parameter value="$PROJECT_DIR$/protected/biopet-gatk-extensions/target/.scala_dependencies" /> + <parameter value="$PROJECT_DIR$/public/bamtobigwig/target/.scala_dependencies" /> + <parameter value="$PROJECT_DIR$/public/bam2wig/target/.scala_dependencies" /> </parameters> </component> </project> \ No newline at end of file diff --git a/protected/basty/Basty.iml b/protected/basty/Basty.iml index 4b3569203de42d51dbb731294b2f1f823e9f1d34..8c135e13d0bded344f4b94b85ac82fddeda18d51 100644 --- a/protected/basty/Basty.iml +++ b/protected/basty/Basty.iml @@ -93,5 +93,6 @@ <orderEntry type="module" module-name="Mapping" /> <orderEntry type="module" module-name="Flexiprep" /> <orderEntry type="module" module-name="BamMetrics" /> + <orderEntry type="module" module-name="Bam2Wig" /> </component> </module> \ No newline at end of file diff --git a/protected/biopet-gatk-pipelines/BiopetGatkPipelines.iml b/protected/biopet-gatk-pipelines/BiopetGatkPipelines.iml index ed47f5705ec48e31e84fc41e3674efa22b566943..5ef14a466a1c37f9d40255694cc02b069bb3be46 100644 --- a/protected/biopet-gatk-pipelines/BiopetGatkPipelines.iml +++ b/protected/biopet-gatk-pipelines/BiopetGatkPipelines.iml @@ -92,5 +92,6 @@ <orderEntry type="module" module-name="Mapping" /> <orderEntry type="module" module-name="Flexiprep" /> <orderEntry type="module" module-name="BamMetrics" /> + <orderEntry type="module" module-name="Bam2Wig" /> </component> </module> \ No newline at end of file diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala index be4566211351a3d077877d1a99cf8be6ba0f58cd..5eb657a517fe566a99728d25527ff042a232693c 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala @@ -9,6 +9,7 @@ import nl.lumc.sasc.biopet.core.MultiSampleQScript import nl.lumc.sasc.biopet.core.PipelineCommand import nl.lumc.sasc.biopet.core.config.Configurable import htsjdk.samtools.SamReaderFactory +import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig import scala.collection.JavaConversions._ import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, CombineGVCFs } import nl.lumc.sasc.biopet.extensions.picard.AddOrReplaceReadGroups @@ -50,6 +51,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri /** Library variantcalling */ val gatkVariantcalling = new GatkVariantcalling(qscript) + gatkVariantcalling.doublePreProces = false gatkVariantcalling.sampleID = sampleId gatkVariantcalling.outputDir = libDir @@ -112,7 +114,6 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri if (bamFile.isDefined) { gatkVariantcalling.inputBams = List(bamFile.get) gatkVariantcalling.variantcalling = config("library_variantcalling", default = false) - gatkVariantcalling.preProcesBams = true gatkVariantcalling.init gatkVariantcalling.biopetScript addAll(gatkVariantcalling.functions) @@ -136,6 +137,8 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri gatkVariantcalling.init gatkVariantcalling.biopetScript addAll(gatkVariantcalling.functions) + + gatkVariantcalling.inputBams.foreach(x => addAll(Bam2Wig(qscript, x).functions)) } } diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala index 47715e26adb11f0ed73520d2769ca55120a58d93..5d2d1de57127fd85f5afa21eb92becd8a7454b2d 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala @@ -7,6 +7,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import java.io.File +import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.tools.{ VcfStats, MpileupToVcf, VcfFilter, MergeAlleles } import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.extensions.gatk.{ AnalyzeCovariates, BaseRecalibrator, GenotypeGVCFs, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, SelectVariants, CombineVariants, UnifiedGenotyper } @@ -38,12 +39,12 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr @Argument(doc = "Sample name", required = false) var sampleID: String = _ - var preProcesBams: Option[Boolean] = config("pre_proces_bams", default = true) + var preProcesBams: Boolean = config("pre_proces_bams", default = true) var variantcalling: Boolean = true - var doublePreProces: Option[Boolean] = config("double_pre_proces", default = true) - var useHaplotypecaller: Option[Boolean] = config("use_haplotypecaller", default = true) - var useUnifiedGenotyper: Option[Boolean] = config("use_unifiedgenotyper", default = false) - var useAllelesOption: Option[Boolean] = config("use_alleles_option", default = false) + var doublePreProces: Boolean = config("double_pre_proces", default = true) + var useHaplotypecaller: Boolean = config("use_haplotypecaller", default = true) + var useUnifiedGenotyper: Boolean = config("use_unifiedgenotyper", default = false) + var useAllelesOption: Boolean = config("use_alleles_option", default = false) var useMpileup: Boolean = config("use_mpileup", default = true) var useIndelRealigner: Boolean = config("use_indel_realign", default = true) var useBaseRecalibration: Boolean = config("use_base_recalibration", default = true) @@ -62,42 +63,53 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr } private def doublePreProces(files: List[File]): List[File] = { - if (files.size == 1) return files if (files.isEmpty) throw new IllegalStateException("Files can't be empty") - if (!doublePreProces.get) return files - val markDup = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam")) - markDup.isIntermediate = useIndelRealigner - add(markDup) - if (useIndelRealigner) { - List(addIndelRealign(markDup.output, outputDir, isIntermediate = false)) + else if (!doublePreProces) files + else if (files.size == 1) { + val bamFile: File = outputDir + files.head.getName + if (bamFile != files.head) { + val oldIndex: File = files.head.getAbsolutePath.stripSuffix(".bam") + ".bai" + val newIndex: File = bamFile.getAbsolutePath.stripSuffix(".bam") + ".bai" + add(Ln(this, oldIndex, newIndex)) + + val bamLn = Ln(this, files.head, bamFile) + bamLn.deps :+= newIndex + add(bamLn) + } + List(bamFile) } else { - List(markDup.output) + val markDup = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam")) + markDup.isIntermediate = useIndelRealigner + add(markDup) + if (useIndelRealigner) { + List(addIndelRealign(markDup.output, outputDir, isIntermediate = false)) + } else { + List(markDup.output) + } } } def biopetScript() { - scriptOutput.bamFiles = if (preProcesBams.get) { - var bamFiles: List[File] = Nil - for (inputBam <- inputBams) { - var bamFile = inputBam - if (useIndelRealigner) { - bamFile = addIndelRealign(bamFile, outputDir, isIntermediate = useBaseRecalibration) - } - if (useBaseRecalibration) { - bamFile = addBaseRecalibrator(bamFile, outputDir, isIntermediate = bamFiles.size > 1) + scriptOutput.bamFiles = { + doublePreProces(if (preProcesBams) { + for (inputBam <- inputBams) yield { + var bamFile = inputBam + if (useIndelRealigner) + bamFile = addIndelRealign(bamFile, outputDir, isIntermediate = useBaseRecalibration) + if (useBaseRecalibration) + bamFile = addBaseRecalibrator(bamFile, outputDir, isIntermediate = inputBams.size > 1) + bamFile } - bamFiles :+= bamFile - } - doublePreProces(bamFiles) - } else if (inputBams.size > 1 && doublePreProces.get) { - doublePreProces(inputBams) - } else inputBams + } else { + inputBams + }) + } if (variantcalling) { var mergBuffer: SortedMap[String, File] = SortedMap() def mergeList = mergBuffer map { case (key, file) => TaggedFile(removeNoneVariants(file), "name=" + key) } - if (sampleID != null && (useHaplotypecaller.get || config("joint_genotyping", default = false).asBoolean)) { + if (sampleID != null && (useHaplotypecaller || config("joint_genotyping", default = false).asBoolean)) { val hcGvcf = new HaplotypeCaller(this) hcGvcf.useGvcf hcGvcf.input_file = scriptOutput.bamFiles @@ -106,7 +118,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr scriptOutput.gvcfFile = hcGvcf.out } - if (useHaplotypecaller.get) { + if (useHaplotypecaller) { if (sampleID != null) { val genotypeGVCFs = GenotypeGVCFs(this, List(scriptOutput.gvcfFile), outputDir + outputName + ".hc.discovery.vcf.gz") add(genotypeGVCFs) @@ -121,7 +133,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr mergBuffer += ("1.HC-Discovery" -> scriptOutput.hcVcfFile) } - if (useUnifiedGenotyper.get) { + if (useUnifiedGenotyper) { val ugVcf = new UnifiedGenotyper(this) ugVcf.input_file = scriptOutput.bamFiles ugVcf.out = outputDir + outputName + ".ug.discovery.vcf.gz" @@ -156,12 +168,12 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr } // Allele mode - if (useAllelesOption.get) { + if (useAllelesOption) { val mergeAlleles = MergeAlleles(this, mergeList.toList, outputDir + "raw.allele__temp_only.vcf.gz") mergeAlleles.isIntermediate = true add(mergeAlleles) - if (useHaplotypecaller.get) { + if (useHaplotypecaller) { val hcAlleles = new HaplotypeCaller(this) hcAlleles.input_file = scriptOutput.bamFiles hcAlleles.out = outputDir + outputName + ".hc.allele.vcf.gz" @@ -172,7 +184,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr mergBuffer += ("3.HC-alleles" -> hcAlleles.out) } - if (useUnifiedGenotyper.get) { + if (useUnifiedGenotyper) { val ugAlleles = new UnifiedGenotyper(this) ugAlleles.input_file = scriptOutput.bamFiles ugAlleles.out = outputDir + outputName + ".ug.allele.vcf.gz" diff --git a/protected/biopet-protected-package/BiopetProtectedPackage.iml b/protected/biopet-protected-package/BiopetProtectedPackage.iml index 65380feeb14e5c5f1670690a903ca6b912e7742b..8defbb28bdf502bfd5878fce8ad70b3e98742838 100644 --- a/protected/biopet-protected-package/BiopetProtectedPackage.iml +++ b/protected/biopet-protected-package/BiopetProtectedPackage.iml @@ -95,6 +95,7 @@ <orderEntry type="module" module-name="Gentrap" /> <orderEntry type="module" module-name="Sage" /> <orderEntry type="module" module-name="Yamsvp" /> + <orderEntry type="module" module-name="Bam2Wig" /> <orderEntry type="module" module-name="BiopetGatkPipelines" /> <orderEntry type="module" module-name="BiopetGatkExtensions" /> <orderEntry type="module" module-name="Basty" /> diff --git a/public/bam2wig/Bam2Wig.iml b/public/bam2wig/Bam2Wig.iml new file mode 100644 index 0000000000000000000000000000000000000000..54fb966499dc08774dd77641ac8d9286ecddbc96 --- /dev/null +++ b/public/bam2wig/Bam2Wig.iml @@ -0,0 +1,92 @@ +<?xml version="1.0" encoding="UTF-8"?> +<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4"> + <component name="NewModuleRootManager" inherit-compiler-output="false"> + <output url="file://$MODULE_DIR$/target/classes" /> + <output-test url="file://$MODULE_DIR$/target/test-classes" /> + <content url="file://$MODULE_DIR$"> + <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" /> + <sourceFolder url="file://$MODULE_DIR$/src/main/scala" isTestSource="false" /> + <excludeFolder url="file://$MODULE_DIR$/target" /> + </content> + <orderEntry type="inheritedJdk" /> + <orderEntry type="sourceFolder" forTests="false" /> + <orderEntry type="module" module-name="BiopetFramework" /> + <orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.11.2" level="project" /> + <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-queue:3.3" level="project" /> + <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-tools-public:3.3" level="project" /> + <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-engine:3.3" level="project" /> + <orderEntry type="library" name="Maven: org.scala-lang:scala-compiler:2.10.2" level="project" /> + <orderEntry type="library" name="Maven: org.scala-lang:scala-reflect:2.10.2" level="project" /> + <orderEntry type="library" name="Maven: log4j:log4j:1.2.15" level="project" /> + <orderEntry type="library" name="Maven: net.sf.jgrapht:jgrapht:0.8.3" level="project" /> + <orderEntry type="library" name="Maven: org.apache.commons:commons-email:1.2" level="project" /> + <orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" /> + <orderEntry type="library" name="Maven: javax.mail:mail:1.4.4" level="project" /> + <orderEntry type="module-library"> + <library name="Maven: com.sun:tools:1.4.2"> + <CLASSES> + <root url="jar:///usr/lib/jvm/java-1.7.0-openjdk-amd64/lib/tools.jar!/" /> + </CLASSES> + <JAVADOC /> + <SOURCES /> + </library> + </orderEntry> + <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-queue-extensions-distribution:3.3" level="project" /> + <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-tools-protected:3.3" level="project" /> + <orderEntry type="library" name="Maven: gov.nist.math:jama:1.0.2" level="project" /> + <orderEntry type="library" scope="RUNTIME" name="Maven: org.broadinstitute.gatk:gatk-queue-extensions-generator:3.3" level="project" /> + <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-utils:3.3" level="project" /> + <orderEntry type="library" name="Maven: samtools:htsjdk:1.120.1620" level="project" /> + <orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.0.3-rc3" level="project" /> + <orderEntry type="library" name="Maven: picard:picard:1.120.1579" level="project" /> + <orderEntry type="library" name="Maven: org.apache.ant:ant:1.8.2" level="project" /> + <orderEntry type="module-library"> + <library name="Maven: com.sun:tools.jar:1.5"> + <CLASSES> + <root url="jar:///usr/lib/jvm/java-1.7.0-openjdk-amd64/lib/tools.jar!/" /> + </CLASSES> + <JAVADOC /> + <SOURCES /> + </library> + </orderEntry> + <orderEntry type="library" name="Maven: colt:colt:1.2.0" level="project" /> + <orderEntry type="library" name="Maven: concurrent:concurrent:1.3.4" level="project" /> + <orderEntry type="library" name="Maven: it.unimi.dsi:fastutil:6.5.3" level="project" /> + <orderEntry type="library" name="Maven: org.simpleframework:simple-xml:2.0.4" level="project" /> + <orderEntry type="library" name="Maven: stax:stax-api:1.0.1" level="project" /> + <orderEntry type="library" name="Maven: stax:stax:1.2.0" level="project" /> + <orderEntry type="library" name="Maven: org.reflections:reflections:0.9.9-RC1" level="project" /> + <orderEntry type="library" name="Maven: org.javassist:javassist:3.16.1-GA" level="project" /> + <orderEntry type="library" name="Maven: dom4j:dom4j:1.6.1" level="project" /> + <orderEntry type="library" name="Maven: xml-apis:xml-apis:1.0.b2" level="project" /> + <orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.6.1" level="project" /> + <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.6.1" level="project" /> + <orderEntry type="library" name="Maven: org.freemarker:freemarker:2.3.18" level="project" /> + <orderEntry type="library" name="Maven: org.apache.commons:commons-jexl:2.1.1" level="project" /> + <orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.1" level="project" /> + <orderEntry type="library" name="Maven: commons-lang:commons-lang:2.5" level="project" /> + <orderEntry type="library" name="Maven: commons-io:commons-io:2.1" level="project" /> + <orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.1" level="project" /> + <orderEntry type="library" name="Maven: org.apache.commons:commons-math:2.2" level="project" /> + <orderEntry type="library" name="Maven: net.java.dev.jna:jna:3.2.7" level="project" /> + <orderEntry type="library" name="Maven: net.java.dev.jets3t:jets3t:0.8.1" level="project" /> + <orderEntry type="library" name="Maven: commons-codec:commons-codec:1.3" level="project" /> + <orderEntry type="library" name="Maven: commons-httpclient:commons-httpclient:3.1" level="project" /> + <orderEntry type="library" name="Maven: com.jamesmurty.utils:java-xmlbuilder:0.4" level="project" /> + <orderEntry type="library" name="Maven: us.levk:drmaa-gridengine:6.2u5" level="project" /> + <orderEntry type="library" name="Maven: us.levk:drmaa-common:1.0" level="project" /> + <orderEntry type="library" name="Maven: com.google.code.gson:gson:2.2.2" level="project" /> + <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.1.1" level="project" /> + <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.1" level="project" /> + <orderEntry type="library" name="Maven: com.google.code.cofoja:cofoja:1.0-r139" level="project" /> + <orderEntry type="library" name="Maven: io.argonaut:argonaut_2.11:6.1-M4" level="project" /> + <orderEntry type="library" name="Maven: org.scalaz:scalaz-core_2.11:7.1.0" level="project" /> + <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-parser-combinators_2.11:1.0.2" level="project" /> + <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-xml_2.11:1.0.2" level="project" /> + <orderEntry type="library" name="Maven: com.github.julien-truffaut:monocle-core_2.11:0.5.0" level="project" /> + <orderEntry type="library" name="Maven: org.biojava:biojava3-core:3.1.0" level="project" /> + <orderEntry type="library" name="Maven: org.biojava:biojava3-sequencing:3.1.0" level="project" /> + <orderEntry type="library" name="Maven: com.google.guava:guava:18.0" level="project" /> + <orderEntry type="library" name="Maven: com.github.scopt:scopt_2.10:3.2.0" level="project" /> + </component> +</module> \ No newline at end of file diff --git a/public/bam2wig/pom.xml b/public/bam2wig/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..6a20ded1b5f9ff98c85f5a6d49a24db6534f670d --- /dev/null +++ b/public/bam2wig/pom.xml @@ -0,0 +1,29 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>nl.lumc.sasc</groupId> + <artifactId>Bam2Wig</artifactId> + <packaging>jar</packaging> + + <parent> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Biopet</artifactId> + <version>0.3.0-DEV</version> + <relativePath>../</relativePath> + </parent> + + <inceptionYear>2014</inceptionYear> + <name>Bam2Wig</name> + + <dependencies> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>BiopetFramework</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + +</project> \ No newline at end of file diff --git a/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala b/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala new file mode 100644 index 0000000000000000000000000000000000000000..e76acd863a3eed3ebdcb1a4b1b5654fe71d623b9 --- /dev/null +++ b/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala @@ -0,0 +1,55 @@ +package nl.lumc.sasc.biopet.pipelines.bamtobigwig + +import java.io.File + +import nl.lumc.sasc.biopet.core.config.Configurable +import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } +import nl.lumc.sasc.biopet.extensions.WigToBigWig +import nl.lumc.sasc.biopet.extensions.igvtools.IGVToolsCount +import org.broadinstitute.gatk.queue.QScript +import org.broadinstitute.gatk.utils.commandline.{ Output, Input } + +/** + * Created by pjvan_thof on 1/29/15. + */ +class Bam2Wig(val root: Configurable) extends QScript with BiopetQScript { + def this() = this(null) + + @Input(doc = "Input bam file", required = true) + var bamFile: File = null + + def init(): Unit = { + } + + def biopetScript(): Unit = { + val bs = new BamToChromSizes(this) + bs.bamFile = bamFile + bs.chromSizesFile = bamFile.getAbsoluteFile + ".chrom.sizes" + bs.isIntermediate = true + add(bs) + + val igvCount = new IGVToolsCount(this) + igvCount.input = bamFile + igvCount.genomeChromSizes = bs.chromSizesFile + igvCount.wig = Some(swapExt(outputDir, bamFile, ".bam", ".wig")) + igvCount.tdf = Some(swapExt(outputDir, bamFile, ".bam", ".tdf")) + add(igvCount) + + val wigToBigWig = new WigToBigWig(this) + wigToBigWig.inputWigFile = igvCount.wig.get + wigToBigWig.inputChromSizesFile = bs.chromSizesFile + wigToBigWig.outputBigWig = swapExt(outputDir, bamFile, ".bam", ".bigwig") + add(wigToBigWig) + } +} + +object Bam2Wig extends PipelineCommand { + def apply(root: Configurable, bamFile: File): Bam2Wig = { + val bamToBigWig = new Bam2Wig(root) + bamToBigWig.outputDir = bamFile.getParent + bamToBigWig.bamFile = bamFile + bamToBigWig.init() + bamToBigWig.biopetScript() + bamToBigWig + } +} \ No newline at end of file diff --git a/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizes.scala b/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizes.scala new file mode 100644 index 0000000000000000000000000000000000000000..5ed1fae9f34191b7394a8dd09b78432ea8a2bdd9 --- /dev/null +++ b/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizes.scala @@ -0,0 +1,30 @@ +package nl.lumc.sasc.biopet.pipelines.bamtobigwig + +import java.io.{ PrintWriter, File } + +import htsjdk.samtools.SamReaderFactory +import nl.lumc.sasc.biopet.core.config.Configurable +import org.broadinstitute.gatk.queue.function.InProcessFunction +import org.broadinstitute.gatk.utils.commandline.{ Output, Input } +import scala.collection.JavaConversions._ + +/** + * Created by pjvan_thof on 1/29/15. + */ +class BamToChromSizes(val root: Configurable) extends InProcessFunction with Configurable { + @Input + var bamFile: File = _ + + @Output + var chromSizesFile: File = _ + + def run(): Unit = { + val bamReader = SamReaderFactory.makeDefault().open(bamFile) + val writer = new PrintWriter(chromSizesFile) + for (ref <- bamReader.getFileHeader.getSequenceDictionary.getSequences) { + writer.println(ref.getSequenceName + "\t" + ref.getSequenceLength) + } + bamReader.close() + writer.close + } +} diff --git a/public/biopet-framework/BiopetFramework.iml b/public/biopet-framework/BiopetFramework.iml index d59d5cc4750108940c39fd95bfae08d7e0300888..2650bfa758dfdb73b291111c3964716088402649 100644 --- a/public/biopet-framework/BiopetFramework.iml +++ b/public/biopet-framework/BiopetFramework.iml @@ -1,5 +1,10 @@ <?xml version="1.0" encoding="UTF-8"?> <module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4"> + <component name="FacetManager"> + <facet type="Python" name="Python"> + <configuration sdkName="" /> + </facet> + </component> <component name="NewModuleRootManager" inherit-compiler-output="false"> <output url="file://$MODULE_DIR$/target/classes" /> <output-test url="file://$MODULE_DIR$/target/test-classes" /> diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala index 83aa87155236e9b2752407fb4bd39ce21dc36824..3ed9ba5bf58e0bc4a62d7c4f9783e4fb924a2838 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala @@ -51,7 +51,8 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab override def freezeFieldValues() { checkExecutable afterGraph - if (jobOutputFile == null) jobOutputFile = new File(firstOutput.getParent + "/." + firstOutput.getName + "." + configName + ".out") + + if (jobOutputFile == null) jobOutputFile = new File(firstOutput.getAbsoluteFile.getParent + "/." + firstOutput.getName + "." + configName + ".out") if (threads == 0) threads = getThreads(defaultThreads) if (threads > 1) nCoresRequest = Option(threads) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala index 394ad5cdda9d6768d5bd9f72e7019370168f7276..0d1d53ef4f69e8ecdb4bdf73e8156e85a47bd17b 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala @@ -30,6 +30,9 @@ class Ln(val root: Configurable) extends InProcessFunction with Configurable { @Output(doc = "Link destination") var out: File = _ + @Output + var deps: List[File] = Nil + var relative: Boolean = true lazy val cmd: String = { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala index 531425b7a50bde7a77d62213561cef91095ecdd9..9c98893c9a4f22216cbcc57041bf7ca65b50be7e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala @@ -14,10 +14,10 @@ class WigToBigWig(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Input wig file") var inputWigFile: File = _ - @Input(doc = "Input chrom sizes file") + @Input(doc = "Input chrom sizes file", required = true) var inputChromSizesFile: File = _ - @Output(doc = "Output BigWig file") + @Output(doc = "Output BigWig file", required = true) var outputBigWig: File = _ executable = config("exe", default = "wigToBigWig") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala index d017864f6988828100f6bbda421fbf734a9a5878..aff6ee16cdfbf1a77529d079bc1789c9ffcc494b 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala @@ -9,6 +9,6 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction abstract class IGVTools extends BiopetCommandLineFunction { executable = config("exe", default = "igvtools", submodule = "igvtools", freeVar = false) override def versionCommand = executable + " version" - override val versionRegex = """IGV Version: ([\d\.]) .*""".r + override val versionRegex = """IGV Version:? ([\w\.]*) .*""".r override val versionExitcode = List(0) } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala index 8037616834ecd4de02e9949883b75d20b45c7347..dc6d21e6d4da53d42513ca8c7ceb9fd67fdc8570 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala @@ -1,8 +1,6 @@ package nl.lumc.sasc.biopet.extensions.igvtools -import java.nio.file.InvalidPathException - import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import java.io.{ FileNotFoundException, File } @@ -13,19 +11,18 @@ import java.io.{ FileNotFoundException, File } * @constructor create a new IGVTools instance from a `.bam` file * */ - class IGVToolsCount(val root: Configurable) extends IGVTools { @Input(doc = "Bam File") var input: File = _ - @Input(doc = "<genome>.chrom.sizes File") + @Input(doc = "<genome>.chrom.sizes File", required = true) var genomeChromSizes: File = _ @Output - var tdf: Option[File] = _ + var tdf: Option[File] = None @Output - var wig: Option[File] = _ + var wig: Option[File] = None var maxZoom: Option[Int] = config("maxZoom") var windowSize: Option[Int] = config("windowSize") @@ -46,10 +43,11 @@ class IGVToolsCount(val root: Configurable) extends IGVTools { override def afterGraph { super.afterGraph - if (!input.exists()) throw new FileNotFoundException("Input bam is required for IGVToolsCount") - if (!wig.isEmpty && !wig.get.getAbsolutePath.endsWith(".wig")) throw new IllegalArgumentException("Wiggle file should have a .wig file-extension") - if (!tdf.isEmpty && !tdf.get.getAbsolutePath.endsWith(".tdf")) throw new IllegalArgumentException("TDF file should have a .tdf file-extension") + wig.foreach(x => if (!x.getAbsolutePath.endsWith(".wig")) + throw new IllegalArgumentException("WIG file should have a .wig file-extension")) + tdf.foreach(x => if (!x.getAbsolutePath.endsWith(".tdf")) + throw new IllegalArgumentException("TDF file should have a .tdf file-extension")) } def cmdLine = { diff --git a/public/biopet-public-package/BiopetPublicPackage.iml b/public/biopet-public-package/BiopetPublicPackage.iml index 345cfb0160f3ab2b64c422138803eea44c9b1db6..4a56bfdd895fae9aa229e505ffb220624537528b 100644 --- a/public/biopet-public-package/BiopetPublicPackage.iml +++ b/public/biopet-public-package/BiopetPublicPackage.iml @@ -94,5 +94,6 @@ <orderEntry type="module" module-name="Gentrap" /> <orderEntry type="module" module-name="Sage" /> <orderEntry type="module" module-name="Yamsvp" /> + <orderEntry type="module" module-name="Bam2Wig" /> </component> </module> \ No newline at end of file diff --git a/public/biopet-public-package/pom.xml b/public/biopet-public-package/pom.xml index f400f8f7c05370b360b1a60d7ec212562f3a5170..0ebd7278059952eca93e42862ba22be632706c6b 100644 --- a/public/biopet-public-package/pom.xml +++ b/public/biopet-public-package/pom.xml @@ -80,6 +80,11 @@ <artifactId>Kopisu</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Bam2Wig</artifactId> + <version>${project.version}</version> + </dependency> <dependency> <groupId>nl.lumc.sasc</groupId> <artifactId>Carp</artifactId> diff --git a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala index 070b1c756b7677541b5835dd272bc1216758e7be..279e33e4ddac28f317b9da98bcda0b9b2b647877 100644 --- a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala +++ b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala @@ -23,6 +23,7 @@ object BiopetExecutablePublic extends BiopetExecutable { nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics, nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp, nl.lumc.sasc.biopet.pipelines.sage.Sage, + nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig, nl.lumc.sasc.biopet.pipelines.kopisu.ConiferPipeline, nl.lumc.sasc.biopet.pipelines.carp.Carp ) diff --git a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala index 578f3afc79f6cac8d2621399bb577222383ee2bb..763b7b9c616e5b301735e6ef387a30df6f510526 100644 --- a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala +++ b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala @@ -20,6 +20,7 @@ import java.io.File import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.extensions.macs2.Macs2CallPeak import nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles +import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig import nl.lumc.sasc.biopet.utils.ConfigUtils import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.utils.commandline.{ Argument, Input } @@ -80,10 +81,10 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript { merge.sortOrder = "coordinate" merge.output = bamFile add(merge) - - //TODO: Add BigWIg track } + addAll(Bam2Wig(qscript, bamFile).functions) + val macs2 = new Macs2CallPeak(qscript) macs2.treatment = bamFile macs2.name = Some(sampleId) diff --git a/public/gentrap/Gentrap.iml b/public/gentrap/Gentrap.iml index 6a01a31efae98b20b779595740810834e2906ddf..ce32544041bc11201b6adb03a31709dc78703bff 100644 --- a/public/gentrap/Gentrap.iml +++ b/public/gentrap/Gentrap.iml @@ -91,5 +91,6 @@ <orderEntry type="module" module-name="Mapping" /> <orderEntry type="module" module-name="Flexiprep" /> <orderEntry type="module" module-name="BamMetrics" /> + <orderEntry type="module" module-name="Bam2Wig" /> </component> </module> \ No newline at end of file diff --git a/public/mapping/Mapping.iml b/public/mapping/Mapping.iml index 3c5b2a63e2c9e3a463b31b233962a829d764566e..ad454f7eaf2f6072be40ed6cb50f81cb484b49d2 100644 --- a/public/mapping/Mapping.iml +++ b/public/mapping/Mapping.iml @@ -90,5 +90,6 @@ <orderEntry type="library" name="Maven: com.github.scopt:scopt_2.10:3.2.0" level="project" /> <orderEntry type="module" module-name="Flexiprep" /> <orderEntry type="module" module-name="BamMetrics" /> + <orderEntry type="module" module-name="Bam2Wig" /> </component> </module> \ No newline at end of file diff --git a/public/mapping/pom.xml b/public/mapping/pom.xml index 345678a7a642580cb2a733d6c90239633efdea5b..ef2604bb240a7863896ecdc1fb4a49a138c748c7 100644 --- a/public/mapping/pom.xml +++ b/public/mapping/pom.xml @@ -49,5 +49,10 @@ <artifactId>BamMetrics</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Bam2Wig</artifactId> + <version>${project.version}</version> + </dependency> </dependencies> </project> diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 9094d38cddbe70d8c7b6787200d2afe94e19d486..57a64f51fac807a83c6da86c99466b2e2f43d057 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -21,6 +21,7 @@ import java.util.Date import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import nl.lumc.sasc.biopet.extensions.{ Ln, Star, Stampy, Bowtie } import nl.lumc.sasc.biopet.extensions.bwa.{ BwaSamse, BwaSampe, BwaAln, BwaMem } +import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig import nl.lumc.sasc.biopet.tools.FastqSplitter import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, SortSam, MergeSamFiles, AddOrReplaceReadGroups } import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics @@ -219,6 +220,9 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai"))) add(Ln(this, bamFile, finalBamFile)) outputFiles += ("finalBamFile" -> bamFile) + + if (config("generate_wig", default = false).asBoolean) + addAll(Bam2Wig(this, finalBamFile).functions) } def addBwaAln(R1: File, R2: File, output: File, deps: List[File]): File = { diff --git a/public/pom.xml b/public/pom.xml index d2e96d852d26f2be989aa35fef81773620b48221..b9043502c766d917ac5bf6fbe665dba46b3df7fb 100644 --- a/public/pom.xml +++ b/public/pom.xml @@ -34,6 +34,7 @@ <module>sage</module> <module>kopisu</module> <module>yamsvp</module> + <module>bam2wig</module> <module>carp</module> </modules> diff --git a/public/sage/Sage.iml b/public/sage/Sage.iml index 6a01a31efae98b20b779595740810834e2906ddf..ce32544041bc11201b6adb03a31709dc78703bff 100644 --- a/public/sage/Sage.iml +++ b/public/sage/Sage.iml @@ -91,5 +91,6 @@ <orderEntry type="module" module-name="Mapping" /> <orderEntry type="module" module-name="Flexiprep" /> <orderEntry type="module" module-name="BamMetrics" /> + <orderEntry type="module" module-name="Bam2Wig" /> </component> </module> \ No newline at end of file diff --git a/public/yamsvp/Yamsvp.iml b/public/yamsvp/Yamsvp.iml index 6a01a31efae98b20b779595740810834e2906ddf..ce32544041bc11201b6adb03a31709dc78703bff 100644 --- a/public/yamsvp/Yamsvp.iml +++ b/public/yamsvp/Yamsvp.iml @@ -91,5 +91,6 @@ <orderEntry type="module" module-name="Mapping" /> <orderEntry type="module" module-name="Flexiprep" /> <orderEntry type="module" module-name="BamMetrics" /> + <orderEntry type="module" module-name="Bam2Wig" /> </component> </module> \ No newline at end of file