Commit 2a2a37fd authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Function to get encoding of the fastq file, replaces python script

parent 096d8a0e
package nl.lumc.sasc.biopet.function.fastq
import java.io.File
import scala.io.Source
import scala.sys.process._
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -32,8 +33,10 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
override def afterGraph {
this.checkExecutable
val fastqcDir = executable.substring(0, executable.lastIndexOf("/"))
if (contaminants == null) contaminants = new File(fastqcDir + "/Contaminants/contaminant_list.txt")
if (contaminants == null) {
val fastqcDir = executable.substring(0, executable.lastIndexOf("/"))
contaminants = new File(fastqcDir + "/Contaminants/contaminant_list.txt")
}
}
override def versionCommand = executable + " --version"
......@@ -51,6 +54,29 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
required("-o", output.getParent()) +
required(fastqfile)
}
def getDataBlock(name:String): Array[String] = { // Based on Fastqc v0.10.1
val outputDir = output.getName.stripSuffix(".zip")
val dataFile = new File(outputDir + "/fastqc_data.txt")
if (!dataFile.exists) return null
val data = Source.fromFile(dataFile).mkString
for (block <- data.split(">>END_MODULE\n")) {
val b = if (block.startsWith("##FastQC")) block.substring(block.indexOf("\n") + 1) else block
if (b.startsWith(">>" + name))
return for (line <- b.split("\n"))
yield line
}
return null
}
def getEncoding: String = {
val block = getDataBlock("Basic Statistics")
if (block == null) return null
for (line <- block
if (line.startsWith("Encoding")))
return line.stripPrefix("Encoding\t")
return null // Could be default Sanger with a warning in the log
}
}
object Fastqc {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment