Commit a61296b2 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added Markduplicates to summary

parent d1016462
......@@ -17,9 +17,10 @@ package nl.lumc.sasc.biopet.extensions.picard
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.summary.Summarizable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
class MarkDuplicates(val root: Configurable) extends Picard {
class MarkDuplicates(val root: Configurable) extends Picard with Summarizable {
javaMainClass = "picard.sam.MarkDuplicates"
@Input(doc = "The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true)
......@@ -91,6 +92,22 @@ class MarkDuplicates(val root: Configurable) extends Picard {
optional("SORTING_COLLECTION_SIZE_RATIO=", sortingCollectionSizeRatio, spaceSeparated = false) +
optional("READ_NAME_REGEX=", readNameRegex, spaceSeparated = false) +
optional("OPTICAL_DUPLICATE_PIXEL_DISTANCE=", opticalDuplicatePixelDistance, spaceSeparated = false)
def summaryFiles: Map[String, File] = Map()
def summaryStats: Map[String, Any] = {
val (header, content) = Picard.getMetrics(outputMetrics)
(for (category <- 0 until content.size) yield {
content(category)(0) -> (
for (
i <- 1 until header.size if i < content(category).size
) yield {
header(i).toLowerCase -> content(category)(i)
}).toMap
}
).toMap
}
}
object MarkDuplicates {
def apply(root: Configurable, input: List[File], outputDir: String): MarkDuplicates = {
......
## htsjdk.samtools.metrics.StringHeader
# picard.sam.MarkDuplicates INPUT=[/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_1/8080_2#43-8080_1.bam] OUTPUT=/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_1/8080_2#43-8080_1.dedup.bam METRICS_FILE=/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_1/8080_2#43-8080_1.dedup.metrics TMP_DIR=[/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/.queue/tmp] CREATE_INDEX=true PROGRAM_RECORD_ID=MarkDuplicates PROGRAM_GROUP_NAME=MarkDuplicates REMOVE_DUPLICATES=false ASSUME_SORTED=false MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=50000 MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=8000 SORTING_COLLECTION_SIZE_RATIO=0.25 READ_NAME_REGEX=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).* OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_MD5_FILE=false
## htsjdk.samtools.metrics.StringHeader
# Started on: Wed Feb 18 17:32:02 CET 2015
## METRICS CLASS picard.sam.DuplicationMetrics
LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATES PERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE
8080_1 5238 603803 115660 1077 5760 26 0.010386 31586584
## HISTOGRAM java.lang.Double
BIN VALUE
1.0 1.000043
2.0 1.98115
3.0 2.943681
4.0 3.887988
5.0 4.814414
6.0 5.723299
7.0 6.614976
8.0 7.489769
9.0 8.347998
10.0 9.189977
11.0 10.016015
12.0 10.826412
13.0 11.621464
14.0 12.401463
15.0 13.166693
16.0 13.917434
17.0 14.653961
18.0 15.376541
19.0 16.085441
20.0 16.780918
21.0 17.463226
22.0 18.132615
23.0 18.78933
24.0 19.433611
25.0 20.065693
26.0 20.685806
27.0 21.294179
28.0 21.891032
29.0 22.476584
30.0 23.051049
31.0 23.614637
32.0 24.167554
33.0 24.710002
34.0 25.24218
35.0 25.76428
36.0 26.276495
37.0 26.779012
38.0 27.272014
39.0 27.755681
40.0 28.230191
41.0 28.695716
42.0 29.152426
43.0 29.60049
44.0 30.040069
45.0 30.471325
46.0 30.894416
47.0 31.309496
48.0 31.716716
49.0 32.116227
50.0 32.508172
51.0 32.892697
52.0 33.269941
53.0 33.640042
54.0 34.003135
55.0 34.359354
56.0 34.708828
57.0 35.051684
58.0 35.388049
59.0 35.718046
60.0 36.041794
61.0 36.359412
62.0 36.671016
63.0 36.97672
64.0 37.276636
65.0 37.570873
66.0 37.859539
67.0 38.14274
68.0 38.420578
69.0 38.693155
70.0 38.960572
71.0 39.222925
72.0 39.480311
73.0 39.732823
74.0 39.980554
75.0 40.223595
76.0 40.462034
77.0 40.695958
78.0 40.925453
79.0 41.150602
80.0 41.371489
81.0 41.588193
82.0 41.800794
83.0 42.00937
84.0 42.213996
85.0 42.414748
86.0 42.611699
87.0 42.804921
88.0 42.994484
89.0 43.180458
90.0 43.362911
91.0 43.541909
92.0 43.717518
93.0 43.889802
94.0 44.058824
95.0 44.224645
96.0 44.387327
97.0 44.546929
98.0 44.703508
99.0 44.857123
100.0 45.00783
package nl.lumc.sasc.biopet.extensions.picard
import java.io.File
import java.nio.file.Paths
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by pjvan_thof on 2/19/15.
*/
class MarkDuplicatesTest extends TestNGSuite with Matchers {
@Test
def summaryData: Unit = {
val file = new File(Paths.get(getClass.getResource("/picard.dedup.metrics").toURI).toString)
val job = new MarkDuplicates(null)
job.outputMetrics = file
job.summaryStats
}
}
\ No newline at end of file
......@@ -96,7 +96,14 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
def summaryFiles = Map()
def summarySettings = Map()
def summarySettings = Map(
"skip_metrics" -> skipMetrics,
"skip_flexiprep" -> skipFlexiprep,
"skip_markduplicates" -> skipMarkduplicates,
"aligner" -> aligner,
"chunking" -> chunking,
"numberChunks" -> numberChunks.getOrElse(1)
)
def init() {
require(outputDir != null, "Missing output directory on mapping module")
......@@ -208,7 +215,9 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
var bamFile = bamFiles.head
if (!skipMarkduplicates) {
bamFile = new File(outputDir, outputName + ".dedup.bam")
add(MarkDuplicates(this, bamFiles, bamFile))
val md = MarkDuplicates(this, bamFiles, bamFile)
add(md)
addSummarizable(md, "mark_duplicates")
} else if (skipMarkduplicates && chunking) {
val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir)
add(mergeSamFile)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment