From 2a2a37fd6d5c5c420ee90e917dec807ac453387e Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Sun, 27 Jul 2014 12:16:13 +0200 Subject: [PATCH] Function to get encoding of the fastq file, replaces python script --- .../sasc/biopet/function/fastq/Fastqc.scala | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Fastqc.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Fastqc.scala index ca30614d1..d0ce249e0 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Fastqc.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Fastqc.scala @@ -1,6 +1,7 @@ package nl.lumc.sasc.biopet.function.fastq import java.io.File +import scala.io.Source import scala.sys.process._ import org.broadinstitute.gatk.utils.commandline.{ Input, Output } @@ -32,8 +33,10 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { override def afterGraph { this.checkExecutable - val fastqcDir = executable.substring(0, executable.lastIndexOf("/")) - if (contaminants == null) contaminants = new File(fastqcDir + "/Contaminants/contaminant_list.txt") + if (contaminants == null) { + val fastqcDir = executable.substring(0, executable.lastIndexOf("/")) + contaminants = new File(fastqcDir + "/Contaminants/contaminant_list.txt") + } } override def versionCommand = executable + " --version" @@ -51,6 +54,29 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { required("-o", output.getParent()) + required(fastqfile) } + + def getDataBlock(name:String): Array[String] = { // Based on Fastqc v0.10.1 + val outputDir = output.getName.stripSuffix(".zip") + val dataFile = new File(outputDir + "/fastqc_data.txt") + if (!dataFile.exists) return null + val data = Source.fromFile(dataFile).mkString + for (block <- data.split(">>END_MODULE\n")) { + val b = if (block.startsWith("##FastQC")) block.substring(block.indexOf("\n") + 1) else block + if (b.startsWith(">>" + name)) + return for (line <- b.split("\n")) + yield line + } + return null + } + + def getEncoding: String = { + val block = getDataBlock("Basic Statistics") + if (block == null) return null + for (line <- block + if (line.startsWith("Encoding"))) + return line.stripPrefix("Encoding\t") + return null // Could be default Sanger with a warning in the log + } } object Fastqc { -- GitLab