diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 876da87289b56d183d070293468dcff9bc493931..1d22bb55b9ea174c2c640b1585dc01015116bb81 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -20,7 +20,7 @@ import java.io.File import java.util.Date import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.{ SampleLibraryTag, BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.extensions.{ Ln, Star, Stampy, Bowtie } +import nl.lumc.sasc.biopet.extensions._ import nl.lumc.sasc.biopet.extensions.bwa.{ BwaSamse, BwaSampe, BwaAln, BwaMem } import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig import nl.lumc.sasc.biopet.tools.FastqSplitter @@ -163,9 +163,14 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S chunks += (chunkDir -> (removeGz(chunkDir + input_R1.getName), if (paired) removeGz(chunkDir + input_R2.get.getName) else "")) } - else chunks += (outputDir -> ( - flexiprep.extractIfNeeded(input_R1, flexiprep.outputDir), - if (paired) flexiprep.extractIfNeeded(input_R2.get, flexiprep.outputDir) else "") + else if (skipFlexiprep) { + chunks += (outputDir -> ( + extractIfNeeded(input_R1, flexiprep.outputDir), + if (paired) extractIfNeeded(input_R2.get, outputDir) else "") + ) + } else chunks += (outputDir -> ( + flexiprep.outputFiles("fastq_input_R1"), + if (paired) flexiprep.outputFiles("fastq_input_R2") else "") ) if (chunking) { @@ -441,6 +446,32 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S return RG.substring(0, RG.lastIndexOf("\\t")) } + + //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function + /** + * Extracts file if file is compressed + * @param file + * @param runDir + * @return returns extracted file + */ + def extractIfNeeded(file: File, runDir: File): File = { + if (file == null) return file + else if (file.getName().endsWith(".gz") || file.getName().endsWith(".gzip")) { + var newFile: File = swapExt(runDir, file, ".gz", "") + if (file.getName().endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "") + val zcatCommand = Zcat(this, file, newFile) + zcatCommand.isIntermediate = true + add(zcatCommand) + return newFile + } else if (file.getName().endsWith(".bz2")) { + val newFile = swapExt(runDir, file, ".bz2", "") + val pbzip2 = Pbzip2(this, file, newFile) + pbzip2.isIntermediate = true + add(pbzip2) + return newFile + } else return file + } + } object Mapping extends PipelineCommand \ No newline at end of file diff --git a/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala b/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala index 5357811e964b4955115dbded66a6a3d3baaa0878..c95fe06035f42fd6e7b5bd64df2b76a6bef99542 100644 --- a/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala +++ b/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala @@ -37,18 +37,23 @@ class MappingTest extends TestNGSuite with Matchers { val chunks = Array(1, 5, 10, 100) val skipMarkDuplicates = Array(true, false) val skipFlexipreps = Array(true, false) + val zipped = Array(true, false) for ( aligner <- aligners; pair <- paired; chunk <- chunks; skipMarkDuplicate <- skipMarkDuplicates; - skipFlexiprep <- skipFlexipreps - ) yield Array(aligner, pair, chunk, skipMarkDuplicate, skipFlexiprep) + skipFlexiprep <- skipFlexipreps; + zipped <- zipped + ) yield Array(aligner, pair, chunk, skipMarkDuplicate, skipFlexiprep, zipped) } @Test(dataProvider = "mappingOptions") - def testMapping(aligner: String, paired: Boolean, chunks: Int, skipMarkDuplicate: Boolean, skipFlexiprep: Boolean) = { + def testMapping(aligner: String, paired: Boolean, chunks: Int, + skipMarkDuplicate: Boolean, + skipFlexiprep: Boolean, + zipped: Boolean) = { val map = ConfigUtils.mergeMaps(Map("output_dir" -> MappingTest.outputDir, "aligner" -> aligner, "number_chunks" -> chunks, @@ -57,15 +62,20 @@ class MappingTest extends TestNGSuite with Matchers { ), Map(MappingTest.executables.toSeq: _*)) val mapping: Mapping = initPipeline(map) - mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq") - if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq")) + if (zipped) { + mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq.gz") + if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq.gz")) + } else { + mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq") + if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq")) + } mapping.sampleId = Some("1") mapping.libId = Some("1") mapping.script() //Flexiprep mapping.functions.count(_.isInstanceOf[Fastqc]) shouldBe (if (skipFlexiprep) 0 else if (paired) 4 else 2) - mapping.functions.count(_.isInstanceOf[Zcat]) shouldBe 0 + mapping.functions.count(_.isInstanceOf[Zcat]) shouldBe (if (!zipped || (chunks > 1 && skipFlexiprep)) 0 else if (paired) 2 else 1) mapping.functions.count(_.isInstanceOf[Seqstat]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 4 else 2) * chunks) mapping.functions.count(_.isInstanceOf[SeqtkSeq]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 2 else 1) * chunks) mapping.functions.count(_.isInstanceOf[Cutadapt]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 2 else 1) * chunks)