From b35535282b0396962e483dd7572a8c9a69d70e35 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Mon, 26 May 2014 19:25:18 +0200 Subject: [PATCH] Gatk now uses the mapping module --- gatk/examples/test.json | 28 +++++++ gatk/nbactions.xml | 6 +- gatk/pom.xml | 2 +- .../sasc/biopet/pipelines/gatk/Gatk.scala | 83 +++++++------------ .../biopet/pipelines/mapping/Mapping.scala | 6 +- 5 files changed, 65 insertions(+), 60 deletions(-) create mode 100644 gatk/examples/test.json diff --git a/gatk/examples/test.json b/gatk/examples/test.json new file mode 100644 index 000000000..a7a018eb3 --- /dev/null +++ b/gatk/examples/test.json @@ -0,0 +1,28 @@ +{ + "fastqc": { "exe": "/home/pjvan_thof/Downloads/FastQC/fastqc" }, + "bwa" : {"exe":"/home/pjvan_thof/pipelines/test/test"}, + "flexiprep": { + "fastqc": {"exe":"/home/pjvan_thof/pipelines/test/test"}, + "cutadapt": {"exe":"/home/pjvan_thof/pipelines/test/test"}, + "sickle": {"exe":"/home/pjvan_thof/pipelines/test/test"} + }, + "gatk": { + "referenceFile" : "/home/pjvan_thof/pipelines/test/test", + "dbsnp": "/home/pjvan_thof/pipelines/test/test", + "hapmap": "/home/pjvan_thof/pipelines/test/test", + "omni": "/home/pjvan_thof/pipelines/test/test", + "1000G": "/home/pjvan_thof/pipelines/test/test", + "mills": "/home/pjvan_thof/pipelines/test/test" + }, + "Samples": { + "test": { + "ID": "test", + "Runs": { + "1" : { + "ID": "1", + "R1" : "/home/pjvan_thof/pipelines/test/test.fastq" + } + } + } + } +} diff --git a/gatk/nbactions.xml b/gatk/nbactions.xml index 531ad188f..fa72b2b19 100644 --- a/gatk/nbactions.xml +++ b/gatk/nbactions.xml @@ -10,7 +10,7 @@ <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> </goals> <properties> - <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.Gatk</exec.args> + <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.Gatk -config /home/pjvan_thof/pipelines/biopet/gatk/examples/test.json -outputDir /home/pjvan_thof/pipelines/test -l debug</exec.args> <exec.executable>java</exec.executable> <exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir> </properties> @@ -25,7 +25,7 @@ <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> </goals> <properties> - <exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.Gatk</exec.args> + <exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.Gatk -config /home/pjvan_thof/pipelines/biopet/gatk/examples/test.json -outputDir /home/pjvan_thof/pipelines/test -l debug</exec.args> <exec.executable>java</exec.executable> <jpda.listen>true</jpda.listen> <exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir> @@ -41,7 +41,7 @@ <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> </goals> <properties> - <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.Gatk</exec.args> + <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.Gatk -config /home/pjvan_thof/pipelines/biopet/gatk/examples/test.json -outputDir /home/pjvan_thof/pipelines/test -l debug</exec.args> <exec.executable>java</exec.executable> <exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir> </properties> diff --git a/gatk/pom.xml b/gatk/pom.xml index 133eb8070..5c1f239b0 100644 --- a/gatk/pom.xml +++ b/gatk/pom.xml @@ -49,7 +49,7 @@ </dependency> <dependency> <groupId>nl.lumc.sasc</groupId> - <artifactId>Flexiprep</artifactId> + <artifactId>Mapping</artifactId> <version>0.1.0</version> </dependency> <dependency> diff --git a/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala b/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala index 1d0b49b6e..daabf90b9 100644 --- a/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala +++ b/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala @@ -1,7 +1,9 @@ package nl.lumc.sasc.biopet.pipelines.gatk import nl.lumc.sasc.biopet.wrappers._ +import nl.lumc.sasc.biopet.wrappers.aligners._ import nl.lumc.sasc.biopet.core._ +import nl.lumc.sasc.biopet.pipelines.mapping._ import nl.lumc.sasc.biopet.pipelines.flexiprep._ import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.extensions.gatk._ @@ -120,8 +122,6 @@ class Gatk(private var globalConfig: Config) extends QScript { } add(indelApplyRecalibration) } else logger.warn("No gVCFs to genotype") - - } } @@ -179,67 +179,40 @@ class Gatk(private var globalConfig: Config) extends QScript { if (fastq_R1 != null) { val runDir: String = outputDir + sampleID + "/run_" + runID + "/" - val flexiprep = new Flexiprep(config) - flexiprep.input_R1 = fastq_R1 - if (paired) flexiprep.input_R2 = fastq_R2 - flexiprep.outputDir = runDir + "flexiprep/" - flexiprep.script - addAll(flexiprep.functions) // Add functions of flexiprep to curent function pool - - val bwaCommand = new Bwa(config) - bwaCommand.R1 = flexiprep.outputFiles("output_R1") - if (paired) bwaCommand.R2 = flexiprep.outputFiles("output_R2") - //bwaCommand.referenceFile = qscript.referenceFile - //bwaCommand.nCoresRequest = 8 - bwaCommand.jobResourceRequests :+= "h_vmem=6G" - bwaCommand.RG = "@RG\\t" + - "ID:" + sampleID + "_" + runID + "\\t" + - "LB:" + sampleID + "_" + runID + "\\t" + - "PL:illumina\\t" + - "CN:SASC\\t" + - "SM:" + sampleID + "\\t" + - "PU:na" - bwaCommand.output = new File(runDir + sampleID + "-run_" + runID + ".sam") - add(bwaCommand) + val mapping = new Mapping(config) + mapping.input_R1 = fastq_R1 + if (paired) mapping.input_R2 = fastq_R2 + mapping.outputDir = runDir + "mapping/" + mapping.RGSM = sampleID + mapping.RGLB = runID + if (runConfig.contains("PL")) mapping.RGPL = runConfig.getAsString("PL") + if (runConfig.contains("PU")) mapping.RGPU = runConfig.getAsString("PU") + if (runConfig.contains("CN")) mapping.RGCN = runConfig.getAsString("CN") + mapping.script + addAll(mapping.functions) // Add functions of mapping to curent function pool - var bamFile:File = addSortSam(List(bwaCommand.output), swapExt(runDir,bwaCommand.output,".sam",".bam"), runDir) - bamFile = addMarkDuplicates(List(bamFile), swapExt(runDir,bamFile,".bam",".dedup.bam"), runDir) - bamFile = addIndelRealign(bamFile,runDir) // Indel realigner + var bamFile:File = addIndelRealign(mapping.outputFiles("finalBamFile"),runDir) // Indel realigner bamFile = addBaseRecalibrator(bamFile,runDir) // Base recalibrator outputFiles += ("FinalBam" -> bamFile) } else this.logger.error("Sample: " + sampleID + ": No R1 found for runs: " + runConfig) return outputFiles } - - def addSortSam(inputSam:List[File], outputFile:File, dir:String) : File = { - val sortSam = new SortSam { - this.input = inputSam - this.createIndex = true - this.output = outputFile - this.memoryLimit = 2 - this.nCoresRequest = 2 - this.jobResourceRequests :+= "h_vmem=4G" - } - add(sortSam) - - return sortSam.output - } - - def addMarkDuplicates(inputBams:List[File], outputFile:File, dir:String) : File = { - val markDuplicates = new MarkDuplicates { - this.input = inputBams - this.output = outputFile - this.REMOVE_DUPLICATES = false - this.metrics = swapExt(dir,outputFile,".bam",".metrics") - this.outputIndex = swapExt(dir,this.output,".bam",".bai") - this.memoryLimit = 2 - this.jobResourceRequests :+= "h_vmem=4G" - } - add(markDuplicates) - return markDuplicates.output - } +// def addMarkDuplicates(inputBams:List[File], outputFile:File, dir:String) : File = { +// val markDuplicates = new MarkDuplicates { +// this.input = inputBams +// this.output = outputFile +// this.REMOVE_DUPLICATES = false +// this.metrics = swapExt(dir,outputFile,".bam",".metrics") +// this.outputIndex = swapExt(dir,this.output,".bam",".bai") +// this.memoryLimit = 2 +// this.jobResourceRequests :+= "h_vmem=4G" +// } +// add(markDuplicates) +// +// return markDuplicates.output +// } def addIndelRealign(inputBam:File, dir:String): File = { val realignerTargetCreator = new RealignerTargetCreator with gatkArguments { diff --git a/mapping/src/main/java/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/mapping/src/main/java/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 73b181452..5a1b5960d 100644 --- a/mapping/src/main/java/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/mapping/src/main/java/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -1,6 +1,7 @@ package nl.lumc.sasc.biopet.pipelines.mapping import nl.lumc.sasc.biopet.wrappers._ +import nl.lumc.sasc.biopet.wrappers.aligners._ import java.util.Date import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.pipelines.flexiprep._ @@ -19,7 +20,7 @@ class Mapping(private var globalConfig: Config) extends QScript { @Input(doc="R1 fastq file", shortName="R1",required=true) var input_R1: File = _ @Input(doc="R2 fastq file", shortName="R2", required=false) var input_R2: File = _ @Argument(doc="Output directory", shortName="outputDir", required=true) var outputDir: String = _ - @Argument(doc="Output name", shortName="outputName", required=true) var outputName: String = _ + @Argument(doc="Output name", shortName="outputName", required=false) var outputName: String = _ @Argument(doc="Skip flexiprep", shortName="skipflexiprep", required=false) var skipFlexiprep: Boolean = false @Argument(doc="Skip mark duplicates", shortName="skipmarkduplicates", required=false) var skipMarkduplicates: Boolean = false @Argument(doc="Alginer", shortName="ALN", required=false) var aligner: String = _ @@ -64,6 +65,8 @@ class Mapping(private var globalConfig: Config) extends QScript { if (RGPU == null) RGPU = config.getAsString("RGPU", "na") if (RGCN == null && config.contains("RGCN")) RGCN = config.getAsString("RGCN") if (RGDS == null && config.contains("RGDS")) RGDS = config.getAsString("RGDS") + + if (outputName == null) outputName = RGID } def script() { @@ -94,6 +97,7 @@ class Mapping(private var globalConfig: Config) extends QScript { } if (!skipMarkduplicates) bamFile = addMarkDuplicates(List(bamFile), swapExt(outputDir,bamFile,".bam",".dedup.bam"), outputDir) + outputFiles += ("finalBamFile" -> bamFile) } def addSortSam(inputSam:List[File], outputFile:File, dir:String) : File = { -- GitLab