Commit 7d751299 authored by Sander Bollen's avatar Sander Bollen
Browse files

Merge branch 'fix-flexiprep_noext' into 'develop'

Update filename + extension retrieval in Flexiprep

Two changes:

* Refactored the uncompressed filename + extension retrieval. Also made sure now that extension trimming is case-insensitive (".gzip" and ".GZIP" are both trimmed).

* Added a clearer error message when the input file doesn't have any extension.

See merge request !256
parents f535473f 954e0245
......@@ -47,4 +47,26 @@ object IoUtils {
}
}
}
/** Possible compression extensions to trim from input files. */
val zipExtensions = Set(".gz", ".gzip", ".bzip2", ".bz", ".xz", ".zip")
/**
* Given a file object and a set of compression extensions, return the filename without any of the compression
* extensions.
*
* Examples:
* - my_file.fq.gz returns "my_file.fq"
* - my_other_file.fastq returns "my_file.fastq"
*
* @param f Input file object.
* @param exts Possible compression extensions to trim.
* @return Filename without compression extension.
*/
def getUncompressedFileName(f: File, exts: Set[String] = zipExtensions): String =
exts.foldLeft(f.getName) {
(fname, ext) =>
if (fname.toLowerCase.endsWith(ext)) fname.dropRight(ext.length)
else fname
}
}
......@@ -19,7 +19,9 @@ import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.{ Zcat, Gzip }
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.IoUtils._
import nl.lumc.sasc.biopet.extensions.tools.{ SeqStat, FastqSync }
import org.broadinstitute.gatk.queue.QScript
class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
......@@ -53,8 +55,6 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
def summarySettings = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired)
var paired: Boolean = input_R2.isDefined
var R1_ext: String = _
var R2_ext: String = _
var R1_name: String = _
var R2_name: String = _
......@@ -85,21 +85,10 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
inputFiles :+= new InputFile(input_R1)
input_R2.foreach(inputFiles :+= new InputFile(_))
if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz"))
else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip"))
else R1_name = input_R1.getName
R1_ext = R1_name.substring(R1_name.lastIndexOf("."), R1_name.length)
R1_name = R1_name.substring(0, R1_name.lastIndexOf(R1_ext))
input_R2 match {
case Some(fileR2) =>
paired = true
if (fileR2.endsWith(".gz")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gz"))
else if (fileR2.endsWith(".gzip")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gzip"))
else R2_name = fileR2.getName
R2_ext = R2_name.substring(R2_name.lastIndexOf("."), R2_name.length)
R2_name = R2_name.substring(0, R2_name.lastIndexOf(R2_ext))
case _ =>
R1_name = getUncompressedFileName(input_R1)
input_R2.foreach { fileR2 =>
paired = true
R2_name = getUncompressedFileName(fileR2)
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment