Skip to content
Snippets Groups Projects
Commit 06c874ae authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge with develop

parents 627f9574 911948ed
No related branches found
No related tags found
No related merge requests found
......@@ -47,4 +47,26 @@ object IoUtils {
}
}
}
/** Possible compression extensions to trim from input files. */
val zipExtensions = Set(".gz", ".gzip", ".bzip2", ".bz", ".xz", ".zip")
/**
* Given a file object and a set of compression extensions, return the filename without any of the compression
* extensions.
*
* Examples:
* - my_file.fq.gz returns "my_file.fq"
* - my_other_file.fastq returns "my_file.fastq"
*
* @param f Input file object.
* @param exts Possible compression extensions to trim.
* @return Filename without compression extension.
*/
def getUncompressedFileName(f: File, exts: Set[String] = zipExtensions): String =
exts.foldLeft(f.getName) {
(fname, ext) =>
if (fname.toLowerCase.endsWith(ext)) fname.dropRight(ext.length)
else fname
}
}
......@@ -19,7 +19,9 @@ import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.{ Zcat, Gzip }
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.IoUtils._
import nl.lumc.sasc.biopet.extensions.tools.{ SeqStat, FastqSync }
import org.broadinstitute.gatk.queue.QScript
class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
......@@ -53,8 +55,6 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
def summarySettings = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired)
var paired: Boolean = input_R2.isDefined
var R1_ext: String = _
var R2_ext: String = _
var R1_name: String = _
var R2_name: String = _
......@@ -85,21 +85,10 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
inputFiles :+= new InputFile(input_R1)
input_R2.foreach(inputFiles :+= new InputFile(_))
if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz"))
else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip"))
else R1_name = input_R1.getName
R1_ext = R1_name.substring(R1_name.lastIndexOf("."), R1_name.length)
R1_name = R1_name.substring(0, R1_name.lastIndexOf(R1_ext))
input_R2 match {
case Some(fileR2) =>
paired = true
if (fileR2.endsWith(".gz")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gz"))
else if (fileR2.endsWith(".gzip")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gzip"))
else R2_name = fileR2.getName
R2_ext = R2_name.substring(R2_name.lastIndexOf("."), R2_name.length)
R2_name = R2_name.substring(0, R2_name.lastIndexOf(R2_ext))
case _ =>
R1_name = getUncompressedFileName(input_R1)
input_R2.foreach { fileR2 =>
paired = true
R2_name = getUncompressedFileName(fileR2)
}
}
......@@ -214,6 +203,7 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
qcCmdR2.beforeCmd()
fqSync.beforeCmd()
commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil
commands.foreach(addPipeJob)
super.beforeCmd()
}
}
......
......@@ -3,7 +3,7 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep
import java.io.File
import nl.lumc.sasc.biopet.core.summary.{ SummaryQScript, Summarizable }
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, BiopetCommandLineFunction, BiopetPipe }
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.extensions.{ Cat, Gzip, Sickle, Cutadapt }
import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSeq
import nl.lumc.sasc.biopet.utils.config.Configurable
......@@ -73,18 +73,20 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman
case _ => None
}
if (seqtk.Q.isDefined) seqtk.V = true
addPipeJob(seqtk)
clip = if (!flexiprep.skipClip) {
val foundAdapters = fastqc.foundAdapters.map(_.seq)
if (foundAdapters.nonEmpty) {
val cutadept = new Cutadapt(root)
cutadept.fastq_input = seqtk.output
cutadept.fastq_output = new File(output.getParentFile, input.getName + ".cutadept.fq")
cutadept.stats_output = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.clip.stats")
if (cutadept.default_clip_mode == "3") cutadept.opt_adapter ++= foundAdapters
else if (cutadept.default_clip_mode == "5") cutadept.opt_front ++= foundAdapters
else if (cutadept.default_clip_mode == "both") cutadept.opt_anywhere ++= foundAdapters
Some(cutadept)
val cutadapt = new Cutadapt(root)
cutadapt.fastq_input = seqtk.output
cutadapt.fastq_output = new File(output.getParentFile, input.getName + ".cutadapt.fq")
cutadapt.stats_output = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.clip.stats")
if (cutadapt.default_clip_mode == "3") cutadapt.opt_adapter ++= foundAdapters
else if (cutadapt.default_clip_mode == "5") cutadapt.opt_front ++= foundAdapters
else if (cutadapt.default_clip_mode == "both") cutadapt.opt_anywhere ++= foundAdapters
addPipeJob(cutadapt)
Some(cutadapt)
} else None
} else None
......@@ -92,17 +94,18 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman
val sickle = new Sickle(root)
sickle.output_stats = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.trim.stats")
sickle.input_R1 = clip match {
case Some(clip) => clip.fastq_output
case _ => seqtk.output
case Some(c) => c.fastq_output
case _ => seqtk.output
}
sickle.output_R1 = new File(output.getParentFile, input.getName + ".sickle.fq")
addPipeJob(sickle)
Some(sickle)
} else None
val outputFile = (clip, trim) match {
case (_, Some(trim)) => trim.output_R1
case (Some(clip), _) => clip.fastq_output
case _ => seqtk.output
case (_, Some(t)) => t.output_R1
case (Some(c), _) => c.fastq_output
case _ => seqtk.output
}
if (compress) outputCommand = {
......@@ -131,10 +134,10 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman
def cmdLine = {
val cmd = (clip, trim) match {
case (Some(clip), Some(trim)) => new BiopetFifoPipe(root, seqtk :: clip :: trim :: outputCommand :: Nil)
case (Some(clip), _) => new BiopetFifoPipe(root, seqtk :: clip :: outputCommand :: Nil)
case (_, Some(trim)) => new BiopetFifoPipe(root, seqtk :: trim :: outputCommand :: Nil)
case _ => new BiopetFifoPipe(root, seqtk :: outputCommand :: Nil)
case (Some(c), Some(t)) => new BiopetFifoPipe(root, seqtk :: c :: t :: outputCommand :: Nil)
case (Some(c), _) => new BiopetFifoPipe(root, seqtk :: c :: outputCommand :: Nil)
case (_, Some(t)) => new BiopetFifoPipe(root, seqtk :: t :: outputCommand :: Nil)
case _ => new BiopetFifoPipe(root, seqtk :: outputCommand :: Nil)
}
//val cmds = (Some(seqtk) :: clip :: trim :: Some(new Gzip(root)) :: Nil).flatten
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment