Skip to content
Snippets Groups Projects
Commit e469c3ab authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added getSummary function

parent 48431cd5
No related branches found
No related tags found
No related merge requests found
...@@ -6,6 +6,7 @@ import org.broadinstitute.gatk.utils.commandline._ ...@@ -6,6 +6,7 @@ import org.broadinstitute.gatk.utils.commandline._
import java.io.File import java.io.File
import argonaut._, Argonaut._ import argonaut._, Argonaut._
import scalaz._, Scalaz._ import scalaz._, Scalaz._
import scala.io.Source
class Sha1sum(val root: Configurable) extends BiopetCommandLineFunction { class Sha1sum(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Zipped file") @Input(doc = "Zipped file")
...@@ -19,7 +20,10 @@ class Sha1sum(val root: Configurable) extends BiopetCommandLineFunction { ...@@ -19,7 +20,10 @@ class Sha1sum(val root: Configurable) extends BiopetCommandLineFunction {
def cmdLine = required(executable) + required(input) + " > " + required(output) def cmdLine = required(executable) + required(input) + " > " + required(output)
def getSummary: Json = { def getSummary: Json = {
return jNull val data = Source.fromFile(output).mkString.split(" ")
return ("path" := output.getAbsolutePath) ->:
("sha1sum" := data(0)) ->:
jEmptyObject
} }
} }
......
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package nl.lumc.sasc.biopet.pipelines.flexiprep package nl.lumc.sasc.biopet.pipelines.flexiprep
import scala.io.Source import scala.io.Source
...@@ -16,6 +10,7 @@ import scalaz._, Scalaz._ ...@@ -16,6 +10,7 @@ import scalaz._, Scalaz._
import java.io.File import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.config.Configurable
import scala.collection.mutable.Map
class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutadapt(root) { class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutadapt(root) {
@Input(doc = "Fastq contams file", required = false) @Input(doc = "Fastq contams file", required = false)
...@@ -55,7 +50,28 @@ class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutada ...@@ -55,7 +50,28 @@ class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutada
} }
def getSummary: Json = { def getSummary: Json = {
return jNull val trimR = """.*Trimmed reads: *(\d*) .*""".r
val tooShortR = """.*Too short reads: *(\d*) .*""".r
val tooLongR = """.*Too long reads: *(\d*) .*""".r
val adapterR = """Adapter '([C|T|A|G]*)'.*trimmed (\d*) times.""".r
var stats: Map[String, Int] = Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0)
var adapter_stats: Map[String, Int] = Map()
if (stats_output.exists) for (line <- Source.fromFile(stats_output).getLines) {
line match {
case trimR(m) => stats += ("trimmed" -> m.toInt)
case tooShortR(m) => stats += ("tooshort" -> m.toInt)
case tooLongR(m) => stats += ("toolong" -> m.toInt)
case adapterR(adapter, count) => adapter_stats += (adapter -> count.toInt)
case _ =>
}
}
return ("num_reads_affected" := stats("trimmed")) ->:
("num_reads_discarded_too_short" := stats("tooshort")) ->:
("num_reads_discarded_too_long" := stats("toolong")) ->:
("adapters" := adapter_stats.toMap) ->:
jEmptyObject
} }
} }
...@@ -69,6 +85,28 @@ object Cutadapt { ...@@ -69,6 +85,28 @@ object Cutadapt {
} }
def mergeSummarys(jsons: List[Json]): Json = { def mergeSummarys(jsons: List[Json]): Json = {
return jNull var affected = 0
var tooShort = 0
var tooLong = 0
var adapter_stats: Map[String, Int] = Map()
for (json <- jsons) {
affected += json.field("num_reads_affected").get.numberOrZero.toInt
tooShort += json.field("num_reads_discarded_too_short").get.numberOrZero.toInt
tooLong += json.field("num_reads_discarded_too_long").get.numberOrZero.toInt
val adapters = json.fieldOrEmptyObject("adapters")
for (key <- adapters.objectFieldsOrEmpty) {
val number = adapters.field(key).get.numberOrZero.toInt
if (adapter_stats.contains(key)) adapter_stats(key) += number
else adapter_stats += (key -> number)
}
}
return ("num_reads_affected" := affected) ->:
("num_reads_discarded_too_short" := tooShort) ->:
("num_reads_discarded_too_long" := tooLong) ->:
("adapters" := adapter_stats.toMap) ->:
jEmptyObject
} }
} }
...@@ -38,7 +38,22 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r ...@@ -38,7 +38,22 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
} }
def getSummary: Json = { def getSummary: Json = {
return jNull val subfixs = Map("plot_duplication_levels" -> "Images/duplication_levels.png",
"plot_kmer_profiles" -> "Images/kmer_profiles.png",
"plot_per_base_gc_content" -> "Images/per_base_gc_content.png",
"plot_per_base_n_content" -> "Images/per_base_n_content.png",
"plot_per_base_quality" -> "Images/per_base_quality.png",
"plot_per_base_sequence_content" -> "Images/per_base_sequence_content.png",
"plot_per_sequence_gc_content" -> "Images/per_sequence_gc_content.png",
"plot_per_sequence_quality" -> "Images/per_sequence_quality.png",
"plot_sequence_length_distribution" -> "Images/sequence_length_distribution.png",
"fastqc_data" -> "fastqc_data.txt")
val dir = output.getAbsolutePath.stripSuffix(".zip") + "/"
var outputMap:Map[String,Map[String,String]] = Map()
for ((k,v) <- subfixs) outputMap += (k -> Map("path" -> (dir+v)))
val temp = ("" := outputMap) ->: jEmptyObject
return temp.fieldOrEmptyObject("")
} }
} }
......
...@@ -288,6 +288,8 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { ...@@ -288,6 +288,8 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
for ((k, v) <- outputFiles) summarize.deps +:= v for ((k, v) <- outputFiles) summarize.deps +:= v
add(summarize) add(summarize)
} }
add(summary)
} }
def extractIfNeeded(file: File, runDir: String): File = { def extractIfNeeded(file: File, runDir: String): File = {
......
...@@ -5,14 +5,54 @@ import nl.lumc.sasc.biopet.core.config.Configurable ...@@ -5,14 +5,54 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import argonaut._, Argonaut._ import argonaut._, Argonaut._
import scalaz._, Scalaz._ import scalaz._, Scalaz._
import scala.io.Source
import scala.collection.mutable.Map
class Sickle(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Sickle(root) { class Sickle(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Sickle(root) {
def getSummary: Json = { def getSummary: Json = {
return jNull val pairKept = """FastQ paired records kept: (\d*) \((\d*) pairs\)""".r
val singleKept = """FastQ single records kept: (\d*) \(from PE1: (\d*), from PE2: (\d*)\)""".r
val pairDiscarded = """FastQ paired records discarded: (\d*) \((\d*) pairs\)""".r
val singleDiscarded = """FastQ single records discarded: (\d*) \(from PE1: (\d*), from PE2: (\d*)\)""".r
var stats:Map[String, Int] = Map()
if (output_stats.exists) for (line <- Source.fromFile(output_stats).getLines) {
line match {
case pairKept(reads, pairs) => stats += ("num_paired_reads_kept" -> reads.toInt)
case singleKept(total, r1, r2) => {
stats += ("num_reads_kept_R1" -> r1.toInt)
stats += ("num_reads_kept_R2" -> r2.toInt)
}
case pairDiscarded(reads, pairs) => stats += ("num_paired_reads_discarded" -> reads.toInt)
case singleDiscarded(total, r1, r2) => {
stats += ("num_reads_discarded_R1" -> r1.toInt)
stats += ("num_reads_discarded_R2" -> r2.toInt)
}
case _ =>
}
}
val temp = ("" := stats.toMap) ->: jEmptyObject
return temp.fieldOrEmptyObject("")
} }
} }
object Sickle { object Sickle {
def mergeSummarys(jsons: List[Json]): Json = { def mergeSummarys(jsons: List[Json]): Json = {
return jNull var total: Map[String, Int] = Map()
for (json <- jsons) {
for (key <- json.objectFieldsOrEmpty) {
if (json.field(key).get.isNumber) {
val number = json.field(key).get.numberOrZero.toInt
if (total.contains(key)) total(key) += number
else total += (key -> number)
}
}
}
val temp = ("" := total.toMap) ->: jEmptyObject
return temp.fieldOrEmptyObject("")
} }
} }
\ No newline at end of file
...@@ -10,6 +10,8 @@ import scalaz._, Scalaz._ ...@@ -10,6 +10,8 @@ import scalaz._, Scalaz._
import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction import nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction
import scala.io.Source
class FastqSync(val root: Configurable) extends PythonCommandLineFunction { class FastqSync(val root: Configurable) extends PythonCommandLineFunction {
setPythonScript("sync_paired_end_reads.py") setPythonScript("sync_paired_end_reads.py")
...@@ -43,7 +45,27 @@ class FastqSync(val root: Configurable) extends PythonCommandLineFunction { ...@@ -43,7 +45,27 @@ class FastqSync(val root: Configurable) extends PythonCommandLineFunction {
} }
def getSummary: Json = { def getSummary: Json = {
return jNull val R1_filteredR = """Filtered (\d*) reads from first read file.""".r
val R2_filteredR = """Filtered (\d*) reads from second read file.""".r
val readsLeftR = """Synced read files contain (\d*) reads.""".r
var R1_filtered = 0
var R2_filtered = 0
var readsLeft = 0
if (output_stats.exists) for (line <- Source.fromFile(output_stats).getLines) {
line match {
case R1_filteredR(m) => R1_filtered = m.toInt
case R2_filteredR(m) => R2_filtered = m.toInt
case readsLeftR(m) => readsLeft = m.toInt
case _ =>
}
}
return ("num_reads_discarded_R1" := R1_filtered) ->:
("num_reads_discarded_R2" := R2_filtered) ->:
("num_reads_kept" := readsLeft) ->:
jEmptyObject
} }
} }
...@@ -61,6 +83,19 @@ object FastqSync { ...@@ -61,6 +83,19 @@ object FastqSync {
} }
def mergeSummarys(jsons: List[Json]): Json = { def mergeSummarys(jsons: List[Json]): Json = {
return jNull var R1_filtered = 0
var R2_filtered = 0
var readsLeft = 0
for (json <- jsons) {
R1_filtered += json.field("num_reads_discarded_R1").get.numberOrZero.toInt
R2_filtered += json.field("num_reads_discarded_R2").get.numberOrZero.toInt
readsLeft += json.field("num_reads_kept").get.numberOrZero.toInt
}
return ("num_reads_discarded_R1" := R1_filtered) ->:
("num_reads_discarded_R2" := R2_filtered) ->:
("num_reads_kept" := readsLeft) ->:
jEmptyObject
} }
} }
\ No newline at end of file
...@@ -6,6 +6,8 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output } ...@@ -6,6 +6,8 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import argonaut._, Argonaut._ import argonaut._, Argonaut._
import scalaz._, Scalaz._ import scalaz._, Scalaz._
import scala.io.Source
import scala.collection.mutable.Map
import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction import nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction
...@@ -30,7 +32,9 @@ class Seqstat(val root: Configurable) extends PythonCommandLineFunction { ...@@ -30,7 +32,9 @@ class Seqstat(val root: Configurable) extends PythonCommandLineFunction {
} }
def getSummary: Json = { def getSummary: Json = {
return jNull val json = Parse.parseOption(Source.fromFile(out).mkString)
if (json.isEmpty) return jNull
else return json.get.fieldOrEmptyObject("stats")
} }
} }
...@@ -44,6 +48,64 @@ object Seqstat { ...@@ -44,6 +48,64 @@ object Seqstat {
} }
def mergeSummarys(jsons: List[Json]): Json = { def mergeSummarys(jsons: List[Json]): Json = {
return jNull def addJson(json:Json, total:Map[String, Int]) {
for (key <- json.objectFieldsOrEmpty) {
if (json.field(key).get.isObject) addJson(json.field(key).get, total)
else if (json.field(key).get.isNumber) {
val number = json.field(key).get.numberOrZero.toInt
if (total.contains(key)) {
if (key == "len_min") {
if (total(key) > number) total(key) = number
} else if (key == "len_max") {
if (total(key) < number) total(key) = number
} else total(key) += number
}
else total += (key -> number)
}
}
}
var basesTotal: Map[String, Int] = Map()
var readsTotal: Map[String, Int] = Map()
var encoding: Set[Json] = Set()
for (json <- jsons) {
encoding += json.fieldOrEmptyString("qual_encoding")
val bases = json.fieldOrEmptyObject("bases")
addJson(bases, basesTotal)
val reads = json.fieldOrEmptyObject("reads")
addJson(reads, readsTotal)
}
return ("bases" := (
("num_n" := basesTotal("num_n")) ->:
("num_total" := basesTotal("num_total")) ->:
("num_qual_gte" := (
("1" := basesTotal("1")) ->:
("10" := basesTotal("10")) ->:
("20" := basesTotal("20")) ->:
("30" := basesTotal("30")) ->:
("40" := basesTotal("40")) ->:
("50" := basesTotal("50")) ->:
("60" := basesTotal("60")) ->:
jEmptyObject
) ) ->: jEmptyObject)) ->:
("reads" := (
("num_with_n" := readsTotal("num_with_n")) ->:
("num_total" := readsTotal("num_total")) ->:
("len_min" := readsTotal("len_min")) ->:
("len_max" := readsTotal("len_max")) ->:
("num_mean_qual_gte" := (
("1" := readsTotal("1")) ->:
("10" := readsTotal("10")) ->:
("20" := readsTotal("20")) ->:
("30" := readsTotal("30")) ->:
("40" := readsTotal("40")) ->:
("50" := readsTotal("50")) ->:
("60" := readsTotal("60")) ->:
jEmptyObject
) ) ->: jEmptyObject)) ->:
("qual_encoding" := encoding.head) ->:
jEmptyObject
} }
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment