From 171522f4770ecf59321f4ab7864654639b973956 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Tue, 3 Jun 2014 11:47:16 +0200 Subject: [PATCH] Changed filenames for compatible summarize script --- .../pipelines/flexiprep/Flexiprep.scala | 122 ++++++++++++++---- 1 file changed, 97 insertions(+), 25 deletions(-) diff --git a/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 9ba56ea64..554101a2d 100644 --- a/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -3,7 +3,6 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.wrappers._ import org.broadinstitute.sting.queue.QScript -import org.broadinstitute.sting.queue.engine.JobRunInfo import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.extensions.picard._ import org.broadinstitute.sting.queue.function._ @@ -21,8 +20,11 @@ class Flexiprep(private var globalConfig: Config) extends QScript with BiopetQSc @Argument(doc="Skip Trim fastq files", shortName="skiptrim", required=false) var skipTrim: Boolean = false @Argument(doc="Skip Clip fastq files", shortName="skipclip", required=false) var skipClip: Boolean = false - //var outputFiles:Map[String,File] = Map() var paired: Boolean = (input_R2 != null) + var R1_ext: String = _ + var R2_ext: String = _ + var R1_name: String = _ + var R2_name: String = _ def init() { for (file <- configfiles) globalConfig.loadConfigFile(file) @@ -33,6 +35,20 @@ class Flexiprep(private var globalConfig: Config) extends QScript with BiopetQSc if (outputDir == null) throw new IllegalStateException("Missing Output directory on flexiprep module") else if (!outputDir.endsWith("/")) outputDir += "/" paired = (input_R2 != null) + + if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz")) + else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip")) + else R1_name = input_R1 + R1_ext = R1_name.substring(R1_name.lastIndexOf("."), R1_name.size) + R1_name = R1_name.substring(0, R1_name.lastIndexOf(R1_ext)) + + if (paired) { + if (input_R2.endsWith(".gz")) R2_name = input_R2.getName.substring(0, input_R2.getName.lastIndexOf(".gz")) + else if (input_R2.endsWith(".gzip")) R2_name = input_R2.getName.substring(0, input_R2.getName.lastIndexOf(".gzip")) + else R2_name = input_R2 + R2_ext = R2_name.substring(R2_name.lastIndexOf("."), R2_name.size) + R2_name = R2_name.substring(0, R2_name.lastIndexOf(R2_ext)) + } } def script() { @@ -42,6 +58,12 @@ class Flexiprep(private var globalConfig: Config) extends QScript with BiopetQSc outputFiles += ("output_R1" -> zcatIfNeeded(input_R1,outputDir)) if (paired) outputFiles += ("output_R2" -> zcatIfNeeded(input_R2,outputDir)) + addSeqstat(outputFiles("output_R1"), "seqstat_R1") + if (paired) addSeqstat(outputFiles("output_R2"), "seqstat_qc_R2") + + addSha1sum(outputFiles("output_R1"), "sha1_R1") + if (paired) addSha1sum(outputFiles("output_R2"), "sha1_R2") + var results: Map[String,File] = Map() if (paired) { results = runTrimClip(outputFiles("output_R1"), outputFiles("output_R2"), outputDir) @@ -51,36 +73,36 @@ class Flexiprep(private var globalConfig: Config) extends QScript with BiopetQSc results = runTrimClip(outputFiles("output_R1"), outputDir) outputFiles += ("output_R1" -> results("output_R1")) } - runFinalFastqc() + runFinalize(List(outputFiles("output_R1")), List(outputFiles("output_R2"))) } def runInitialFastqc() { - var fastqc_R1 = runFastqc(input_R1,outputDir + "/fastqc_R1/") + var fastqc_R1 = runFastqc(input_R1,outputDir + "/" + R1_name + ".fastqc/") outputFiles += ("fastqc_R1" -> fastqc_R1.output) - outputFiles += ("qualtype_R1" -> getQualtype(fastqc_R1)) - outputFiles += ("contams_R1" -> getContams(fastqc_R1)) + outputFiles += ("qualtype_R1" -> getQualtype(fastqc_R1, R1_name)) + outputFiles += ("contams_R1" -> getContams(fastqc_R1, R1_name)) if (paired) { - var fastqc_R2 = runFastqc(input_R2,outputDir + "/fastqc_R2/") + var fastqc_R2 = runFastqc(input_R2,outputDir + "/" + R2_name + ".fastqc/") outputFiles += ("fastqc_R2" -> fastqc_R2.output) - outputFiles += ("qualtype_R2" -> getQualtype(fastqc_R2)) - outputFiles += ("contams_R2" -> getContams(fastqc_R2)) + outputFiles += ("qualtype_R2" -> getQualtype(fastqc_R2, R2_name)) + outputFiles += ("contams_R2" -> getContams(fastqc_R2, R2_name)) } } - def getQualtype(fastqc:Fastqc): File = { + def getQualtype(fastqc:Fastqc, pairname:String): File = { val fastqcToQualtype = new FastqcToQualtype(config) { this.fastqc_output = fastqc.output - this.out = swapExt(outputDir, fastqc.fastqfile, "", ".qualtype.txt") + this.out = new File(outputDir + pairname + ".qualtype.txt") } add(fastqcToQualtype) return fastqcToQualtype.out } - def getContams(fastqc:Fastqc): File = { + def getContams(fastqc:Fastqc, pairname:String): File = { val fastqcToContams = new FastqcToContams(config) { this.fastqc_output = fastqc.output - this.out = swapExt(outputDir, fastqc.fastqfile, "", ".contams.txt") + this.out = new File(outputDir + pairname + ".contams.txt") this.contams_file = fastqc.contaminants } add(fastqcToContams) @@ -92,12 +114,9 @@ class Flexiprep(private var globalConfig: Config) extends QScript with BiopetQSc } def runTrimClip(R1_in:File, R2_in:File, outDir:String) : Map[String,File] = { var results: Map[String,File] = Map() - + var R1: File = new File(R1_in) var R2: File = new File(R2_in) - var R1_ext: String = R1.getName().substring(R1.getName().lastIndexOf("."), R1.getName().size) - var R2_ext: String = "" - if (paired) R2_ext = R2.getName().substring(R2.getName().lastIndexOf("."), R2.getName().size) if (!skipClip) { // Adapter clipping val cutadapt_R1 = new Cutadapt(config) { @@ -119,9 +138,9 @@ class Flexiprep(private var globalConfig: Config) extends QScript with BiopetQSc this.input_start_fastq = cutadapt_R1.fastq_input this.input_R1 = cutadapt_R1.fastq_output this.input_R2 = cutadapt_R2.fastq_output - this.output_R1 = swapExt(outDir, R1, ".clip"+R1_ext, ".clipsync"+R1_ext) - this.output_R2 = swapExt(outDir, R2, ".clip"+R2_ext, ".clipsync"+R2_ext) - this.output_stats = swapExt(outDir, R1, ".clip"+R1_ext, ".clipsync.stats") + this.output_R1 = swapExt(outDir, R1, R1_ext, ".sync"+R1_ext) + this.output_R2 = swapExt(outDir, R2, R2_ext, ".sync"+R2_ext) + this.output_stats = swapExt(outDir, R1, R1_ext, ".sync.stats") } add(fastqSync) R1 = fastqSync.output_R1 @@ -151,11 +170,52 @@ class Flexiprep(private var globalConfig: Config) extends QScript with BiopetQSc return results } - def runFinalFastqc() { + def runFinalize(fastq_R1:List[File], fastq_R2:List[File]) { + if (fastq_R1.length != fastq_R2.length) throw new IllegalStateException("R1 and R2 file number is not the same") + var R1: File = "" + var R2: File = "" + if (fastq_R1.length == 1) { + for (file <- fastq_R1) R1 = file + for (file <- fastq_R2) R2 = file + } else { + throw new IllegalStateException("Not yet inplemented") // For chuncking + } + + if (!config.getAsBoolean("skip_native_link", false)) { + val lnR1 = new Ln(config) { this.in = R1 } + R1 = new File(outputDir + R1_name + ".qc" + R1_ext) + lnR1.out = R1 + add(lnR1) + if (paired) { + val lnR2 = new Ln(config) { this.in = R2 } + R2 = new File(outputDir + R2_name + ".qc" + R2_ext) + lnR2.out = R2 + add(lnR2) + } + } + + addSeqstat(R1, "seqstat_qc_R1") + if (paired) addSeqstat(R2, "seqstat_qc_R2") + + addSha1sum(R1, "sha1_qc_R1") + if (paired) addSha1sum(R2, "sha1_qc_R2") + if (!skipTrim || !skipClip) { - outputFiles += ("fastqc_R1_final" -> runFastqc(outputFiles("output_R1"),outputDir + "/fastqc_qc_R1/").output) - if (paired) outputFiles += ("fastqc_R2_final" -> runFastqc(outputFiles("output_R2"),outputDir + "/fastqc_qc_R2/").output) + outputFiles += ("fastqc_R1_final" -> runFastqc(outputFiles("output_R1"),outputDir + "/" + R1_name + ".qc.fastqc/").output) + if (paired) outputFiles += ("fastqc_R2_final" -> runFastqc(outputFiles("output_R2"),outputDir + "/" + R2_name + ".qc.fastqc/").output) + } + + val summarize = new Summarize(config) { + this.runDir = outputDir + this.samplea = R1_name + if (paired) this.sampleb = R2_name + this.samplename = R1_name + this.clip = !skipClip + this.trim = !skipTrim + this.out = new File(outputDir + R1_name + ".summary.json") } + for ((k,v) <- outputFiles) summarize.deps +:= v + add(summarize) } def runFastqc(fastqfile:File, outDir:String) : Fastqc = { @@ -183,8 +243,20 @@ class Flexiprep(private var globalConfig: Config) extends QScript with BiopetQSc } else return file } - override def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) { - logger.info("Flexiprep is done") + def addSeqstat(fastq:File, key:String) { + val ext = fastq.getName.substring(fastq.getName.lastIndexOf(".")) + val seqstat = new Seqstat(config) { this.input_fastq = fastq } + seqstat.out = swapExt(outputDir, fastq, ext, ".seqstats.json") + add(seqstat) + outputFiles += (key -> seqstat.out) + } + + def addSha1sum(fastq:File, key:String) { + val ext = fastq.getName.substring(fastq.getName.lastIndexOf(".")) + val sha1sum = new Sha1sum(config) { this.in = fastq } + sha1sum.out = swapExt(outputDir, fastq, ext, ".sha1") + add(sha1sum) + outputFiles += (key -> sha1sum.out) } } -- GitLab