diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/IoUtils.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/IoUtils.scala index b9392c40decdb3ce837e0d06e663fcd24fbb9be5..c046e951f9ab605cc3cecfc4f1775ad053a7472a 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/IoUtils.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/IoUtils.scala @@ -47,4 +47,26 @@ object IoUtils { } } } + + /** Possible compression extensions to trim from input files. */ + val zipExtensions = Set(".gz", ".gzip", ".bzip2", ".bz", ".xz", ".zip") + + /** + * Given a file object and a set of compression extensions, return the filename without any of the compression + * extensions. + * + * Examples: + * - my_file.fq.gz returns "my_file.fq" + * - my_other_file.fastq returns "my_file.fastq" + * + * @param f Input file object. + * @param exts Possible compression extensions to trim. + * @return Filename without compression extension. + */ + def getUncompressedFileName(f: File, exts: Set[String] = zipExtensions): String = + exts.foldLeft(f.getName) { + (fname, ext) => + if (fname.toLowerCase.endsWith(ext)) fname.dropRight(ext.length) + else fname + } } diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 9182b3141d55db1714a358cca25a1273466686d1..1f1f2510cc600544c95d8db117bad3c3e888be01 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -19,7 +19,9 @@ import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, SampleLibraryTag } import nl.lumc.sasc.biopet.extensions.{ Zcat, Gzip } import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.utils.IoUtils._ import nl.lumc.sasc.biopet.extensions.tools.{ SeqStat, FastqSync } + import org.broadinstitute.gatk.queue.QScript class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag { @@ -53,8 +55,6 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with def summarySettings = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired) var paired: Boolean = input_R2.isDefined - var R1_ext: String = _ - var R2_ext: String = _ var R1_name: String = _ var R2_name: String = _ @@ -85,21 +85,10 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with inputFiles :+= new InputFile(input_R1) input_R2.foreach(inputFiles :+= new InputFile(_)) - if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz")) - else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip")) - else R1_name = input_R1.getName - R1_ext = R1_name.substring(R1_name.lastIndexOf("."), R1_name.length) - R1_name = R1_name.substring(0, R1_name.lastIndexOf(R1_ext)) - - input_R2 match { - case Some(fileR2) => - paired = true - if (fileR2.endsWith(".gz")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gz")) - else if (fileR2.endsWith(".gzip")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gzip")) - else R2_name = fileR2.getName - R2_ext = R2_name.substring(R2_name.lastIndexOf("."), R2_name.length) - R2_name = R2_name.substring(0, R2_name.lastIndexOf(R2_ext)) - case _ => + R1_name = getUncompressedFileName(input_R1) + input_R2.foreach { fileR2 => + paired = true + R2_name = getUncompressedFileName(fileR2) } }