Skip to content
Snippets Groups Projects
Commit 7d751299 authored by Sander Bollen's avatar Sander Bollen
Browse files

Merge branch 'fix-flexiprep_noext' into 'develop'

Update filename + extension retrieval in Flexiprep

Two changes:

* Refactored the uncompressed filename + extension retrieval. Also made sure now that extension trimming is case-insensitive (".gzip" and ".GZIP" are both trimmed).

* Added a clearer error message when the input file doesn't have any extension.

See merge request !256
parents f535473f 954e0245
No related branches found
No related tags found
No related merge requests found
......@@ -47,4 +47,26 @@ object IoUtils {
}
}
}
/** Possible compression extensions to trim from input files. */
val zipExtensions = Set(".gz", ".gzip", ".bzip2", ".bz", ".xz", ".zip")
/**
* Given a file object and a set of compression extensions, return the filename without any of the compression
* extensions.
*
* Examples:
* - my_file.fq.gz returns "my_file.fq"
* - my_other_file.fastq returns "my_file.fastq"
*
* @param f Input file object.
* @param exts Possible compression extensions to trim.
* @return Filename without compression extension.
*/
def getUncompressedFileName(f: File, exts: Set[String] = zipExtensions): String =
exts.foldLeft(f.getName) {
(fname, ext) =>
if (fname.toLowerCase.endsWith(ext)) fname.dropRight(ext.length)
else fname
}
}
......@@ -19,7 +19,9 @@ import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.{ Zcat, Gzip }
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.IoUtils._
import nl.lumc.sasc.biopet.extensions.tools.{ SeqStat, FastqSync }
import org.broadinstitute.gatk.queue.QScript
class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
......@@ -53,8 +55,6 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
def summarySettings = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired)
var paired: Boolean = input_R2.isDefined
var R1_ext: String = _
var R2_ext: String = _
var R1_name: String = _
var R2_name: String = _
......@@ -85,21 +85,10 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
inputFiles :+= new InputFile(input_R1)
input_R2.foreach(inputFiles :+= new InputFile(_))
if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz"))
else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip"))
else R1_name = input_R1.getName
R1_ext = R1_name.substring(R1_name.lastIndexOf("."), R1_name.length)
R1_name = R1_name.substring(0, R1_name.lastIndexOf(R1_ext))
input_R2 match {
case Some(fileR2) =>
paired = true
if (fileR2.endsWith(".gz")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gz"))
else if (fileR2.endsWith(".gzip")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gzip"))
else R2_name = fileR2.getName
R2_ext = R2_name.substring(R2_name.lastIndexOf("."), R2_name.length)
R2_name = R2_name.substring(0, R2_name.lastIndexOf(R2_ext))
case _ =>
R1_name = getUncompressedFileName(input_R1)
input_R2.foreach { fileR2 =>
paired = true
R2_name = getUncompressedFileName(fileR2)
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment