From eb8aef9357f1cd2c7e3b8c45af1d20ba98897786 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Wed, 18 Feb 2015 11:47:34 +0100 Subject: [PATCH] Fix picard summaries --- .../pipelines/bammetrics/BamMetrics.scala | 1 + .../CollectAlignmentSummaryMetrics.scala | 14 +- .../picard/CollectInsertSizeMetrics.scala | 2 +- .../test/resources/picard.alignmentMetrics | 12 + .../test/resources/picard.insertsizemetrics | 717 ++++++++++++++++++ .../CollectAlignmentSummaryMetricsTest.scala | 23 + .../picard/CollectInsertSizeMetricsTest.scala | 23 + 7 files changed, 786 insertions(+), 6 deletions(-) create mode 100644 public/biopet-framework/src/test/resources/picard.alignmentMetrics create mode 100644 public/biopet-framework/src/test/resources/picard.insertsizemetrics create mode 100644 public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala create mode 100644 public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala index 506e42337..cbbf91da7 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala @@ -83,6 +83,7 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit add(BedToInterval(this, baitBedFile, inputBam, outputDir), true) for (bedFile <- bedFiles) { + //TODO: Add target jobs to summary val targetDir = new File(outputDir, bedFile.getName.stripSuffix(".bed")) val targetInterval = BedToInterval(this, bedFile, inputBam, targetDir) add(targetInterval, true) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala index 8fad20529..f69114521 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala @@ -66,11 +66,15 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with def summaryData: Map[String, Any] = { val (header, content) = Picard.getMetrics(output) - (for (category <- 0 to content.size) yield { - content(category)(0) -> (for (i <- 1 to header.size) yield { - header(i).toLowerCase -> content(category)(i) - }).toMap - }).toMap + (for (category <- 0 until content.size) yield { + content(category)(0) -> ( + for ( + i <- 1 until header.size if i < content(category).size + ) yield { + header(i).toLowerCase -> content(category)(i) + }).toMap + } + ).toMap } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala index f83298822..9585491f9 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala @@ -75,7 +75,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa def summaryData: Map[String, Any] = { val (header, content) = Picard.getMetrics(output) - (for (i <- 0 to header.size) + (for (i <- 0 to header.size if i < content.head.size) yield (header(i).toLowerCase -> content.head(i))).toMap } } diff --git a/public/biopet-framework/src/test/resources/picard.alignmentMetrics b/public/biopet-framework/src/test/resources/picard.alignmentMetrics new file mode 100644 index 000000000..2f05f08c4 --- /dev/null +++ b/public/biopet-framework/src/test/resources/picard.alignmentMetrics @@ -0,0 +1,12 @@ +## htsjdk.samtools.metrics.StringHeader +# picard.analysis.CollectAlignmentSummaryMetrics INPUT=/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_2/8080_2#43-8080_2.dedup.bam OUTPUT=/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_2/metrics/8080_2#43-8080_2.dedup.alignmentMetrics REFERENCE_SEQUENCE=/home/cwknetsch/pipeline_pilot/input/reference/M120_23032012.fasta ASSUME_SORTED=true TMP_DIR=[/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/.queue/tmp] CREATE_INDEX=true MAX_INSERT_SIZE=100000 ADAPTER_SEQUENCE=[AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG] METRIC_ACCUMULATION_LEVEL=[ALL_READS] IS_BISULFITE_SEQUENCED=false STOP_AFTER=0 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_MD5_FILE=false +## htsjdk.samtools.metrics.StringHeader +# Started on: Tue Feb 17 15:40:01 CET 2015 + +## METRICS CLASS picard.analysis.AlignmentSummaryMetrics +CATEGORY TOTAL_READS PF_READS PCT_PF_READS PF_NOISE_READS PF_READS_ALIGNED PCT_PF_READS_ALIGNED PF_ALIGNED_BASES PF_HQ_ALIGNED_READS PF_HQ_ALIGNED_BASES PF_HQ_ALIGNED_Q20_BASES PF_HQ_MEDIAN_MISMATCHES PF_MISMATCH_RATE PF_HQ_ERROR_RATE PF_INDEL_RATE MEAN_READ_LENGTH READS_ALIGNED_IN_PAIRS PCT_READS_ALIGNED_IN_PAIRS BAD_CYCLES STRAND_BALANCE PCT_CHIMERAS PCT_ADAPTER SAMPLE LIBRARY READ_GROUP +FIRST_OF_PAIR 664252 664252 1 0 606507 0.913068 60008919 594828 58868522 58661158 0 0.000976 0.000962 0.00003 99.071709 603803 0.995542 0 0.499488 0.027347 0.000056 +SECOND_OF_PAIR 664252 664252 1 0 606337 0.912812 60044312 594744 58913706 58680340 0 0.000842 0.00083 0.000034 99.128317 603803 0.995821 0 0.500456 0.027347 0.000033 +PAIR 1328504 1328504 1 0 1212844 0.91294 120053231 1189572 117782228 117341498 0 0.000909 0.000896 0.000032 99.100013 1207606 0.995681 0 0.499972 0.027347 0.000044 + + diff --git a/public/biopet-framework/src/test/resources/picard.insertsizemetrics b/public/biopet-framework/src/test/resources/picard.insertsizemetrics new file mode 100644 index 000000000..ae5131591 --- /dev/null +++ b/public/biopet-framework/src/test/resources/picard.insertsizemetrics @@ -0,0 +1,717 @@ +## htsjdk.samtools.metrics.StringHeader +# picard.analysis.CollectInsertSizeMetrics HISTOGRAM_FILE=/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_2/metrics/8080_2#43-8080_2.dedup.insertsizemetrics.pdf INPUT=/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_2/8080_2#43-8080_2.dedup.bam OUTPUT=/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_2/metrics/8080_2#43-8080_2.dedup.insertsizemetrics REFERENCE_SEQUENCE=/home/cwknetsch/pipeline_pilot/input/reference/M120_23032012.fasta ASSUME_SORTED=true TMP_DIR=[/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/.queue/tmp] CREATE_INDEX=true DEVIATIONS=10.0 MINIMUM_PCT=0.05 METRIC_ACCUMULATION_LEVEL=[ALL_READS] STOP_AFTER=0 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_MD5_FILE=false +## htsjdk.samtools.metrics.StringHeader +# Started on: Tue Feb 17 15:40:01 CET 2015 + +## METRICS CLASS picard.analysis.InsertSizeMetrics +MEDIAN_INSERT_SIZE MEDIAN_ABSOLUTE_DEVIATION MIN_INSERT_SIZE MAX_INSERT_SIZE MEAN_INSERT_SIZE STANDARD_DEVIATION READ_PAIRS PAIR_ORIENTATION WIDTH_OF_10_PERCENT WIDTH_OF_20_PERCENT WIDTH_OF_30_PERCENT WIDTH_OF_40_PERCENT WIDTH_OF_50_PERCENT WIDTH_OF_60_PERCENT WIDTH_OF_70_PERCENT WIDTH_OF_80_PERCENT WIDTH_OF_90_PERCENT WIDTH_OF_99_PERCENT SAMPLE LIBRARY READ_GROUP +249 50 19 4010275 269.721498 88.166884 585513 FR 21 41 59 81 101 123 151 191 297 849 + +## HISTOGRAM java.lang.Integer +insert_size All_Reads.fr_count +19 51 +20 21 +21 4 +22 4 +23 1 +27 4 +28 1 +29 1 +31 1 +32 3 +34 2 +35 2 +36 4 +37 1 +40 2 +43 1 +46 1 +47 2 +49 1 +53 1 +54 1 +57 4 +58 1 +59 4 +62 2 +64 3 +66 4 +70 1 +71 3 +73 1 +74 1 +75 2 +77 3 +78 3 +79 3 +80 2 +81 5 +82 1 +83 4 +84 5 +85 3 +86 8 +87 4 +88 1 +89 4 +90 3 +91 8 +92 15 +93 3 +94 3 +95 2 +96 2 +97 10 +98 2 +99 7 +100 2 +101 6 +102 9 +103 2 +104 8 +105 9 +106 8 +107 7 +108 5 +109 14 +110 10 +111 14 +112 18 +113 15 +114 10 +115 13 +116 10 +117 29 +118 21 +119 37 +120 24 +121 37 +122 53 +123 48 +124 45 +125 54 +126 65 +127 70 +128 79 +129 107 +130 105 +131 96 +132 144 +133 158 +134 139 +135 184 +136 186 +137 236 +138 222 +139 243 +140 286 +141 292 +142 347 +143 407 +144 429 +145 479 +146 499 +147 550 +148 571 +149 648 +150 677 +151 691 +152 787 +153 856 +154 960 +155 956 +156 1023 +157 1097 +158 1154 +159 1280 +160 1268 +161 1357 +162 1476 +163 1539 +164 1646 +165 1658 +166 1746 +167 1788 +168 1948 +169 1987 +170 2101 +171 2083 +172 2209 +173 2324 +174 2298 +175 2403 +176 2441 +177 2580 +178 2613 +179 2734 +180 2780 +181 2926 +182 2830 +183 2989 +184 2961 +185 3077 +186 3098 +187 3041 +188 3250 +189 3144 +190 3257 +191 3286 +192 3238 +193 3378 +194 3553 +195 3403 +196 3454 +197 3443 +198 3452 +199 3457 +200 3633 +201 3664 +202 3616 +203 3589 +204 3547 +205 3541 +206 3635 +207 3644 +208 3634 +209 3699 +210 3656 +211 3645 +212 3623 +213 3584 +214 3627 +215 3586 +216 3588 +217 3543 +218 3536 +219 3596 +220 3562 +221 3670 +222 3494 +223 3563 +224 3511 +225 3414 +226 3439 +227 3404 +228 3580 +229 3443 +230 3325 +231 3394 +232 3384 +233 3378 +234 3308 +235 3404 +236 3362 +237 3295 +238 3302 +239 3170 +240 3151 +241 3232 +242 3233 +243 3139 +244 3058 +245 3152 +246 3152 +247 3118 +248 2958 +249 2994 +250 3070 +251 2923 +252 2960 +253 2980 +254 2875 +255 2808 +256 2821 +257 2802 +258 2809 +259 2684 +260 2766 +261 2756 +262 2686 +263 2687 +264 2640 +265 2559 +266 2590 +267 2574 +268 2529 +269 2543 +270 2484 +271 2520 +272 2391 +273 2442 +274 2490 +275 2339 +276 2325 +277 2335 +278 2331 +279 2282 +280 2334 +281 2114 +282 2164 +283 2151 +284 2166 +285 2192 +286 2146 +287 2108 +288 2131 +289 2094 +290 2055 +291 2042 +292 1941 +293 2110 +294 2000 +295 1979 +296 1923 +297 1955 +298 1881 +299 1946 +300 1936 +301 1846 +302 1830 +303 1806 +304 1818 +305 1756 +306 1746 +307 1717 +308 1726 +309 1762 +310 1610 +311 1635 +312 1577 +313 1625 +314 1615 +315 1536 +316 1521 +317 1602 +318 1579 +319 1546 +320 1493 +321 1453 +322 1523 +323 1518 +324 1451 +325 1406 +326 1387 +327 1331 +328 1450 +329 1297 +330 1342 +331 1278 +332 1356 +333 1374 +334 1285 +335 1250 +336 1270 +337 1298 +338 1293 +339 1216 +340 1265 +341 1204 +342 1244 +343 1250 +344 1212 +345 1126 +346 1170 +347 1181 +348 1120 +349 1087 +350 1106 +351 1050 +352 1037 +353 1038 +354 1004 +355 1067 +356 1037 +357 1025 +358 1021 +359 1030 +360 992 +361 993 +362 979 +363 888 +364 909 +365 905 +366 899 +367 946 +368 919 +369 892 +370 893 +371 871 +372 897 +373 894 +374 870 +375 832 +376 824 +377 863 +378 785 +379 833 +380 811 +381 776 +382 788 +383 775 +384 761 +385 755 +386 761 +387 725 +388 711 +389 650 +390 664 +391 731 +392 733 +393 725 +394 667 +395 661 +396 659 +397 651 +398 740 +399 688 +400 679 +401 595 +402 606 +403 607 +404 603 +405 605 +406 567 +407 617 +408 582 +409 551 +410 554 +411 572 +412 578 +413 505 +414 557 +415 549 +416 516 +417 517 +418 509 +419 526 +420 529 +421 486 +422 526 +423 503 +424 479 +425 455 +426 479 +427 483 +428 446 +429 431 +430 435 +431 504 +432 462 +433 464 +434 447 +435 399 +436 389 +437 439 +438 414 +439 383 +440 391 +441 408 +442 389 +443 380 +444 398 +445 368 +446 378 +447 354 +448 377 +449 354 +450 341 +451 356 +452 322 +453 336 +454 348 +455 333 +456 363 +457 315 +458 318 +459 329 +460 304 +461 323 +462 314 +463 319 +464 295 +465 299 +466 301 +467 266 +468 278 +469 278 +470 286 +471 243 +472 260 +473 248 +474 261 +475 297 +476 256 +477 240 +478 215 +479 264 +480 213 +481 256 +482 264 +483 230 +484 275 +485 220 +486 244 +487 226 +488 206 +489 193 +490 240 +491 184 +492 217 +493 223 +494 213 +495 214 +496 200 +497 198 +498 191 +499 207 +500 190 +501 183 +502 205 +503 203 +504 174 +505 181 +506 193 +507 197 +508 177 +509 185 +510 179 +511 165 +512 167 +513 177 +514 183 +515 133 +516 165 +517 153 +518 145 +519 143 +520 135 +521 133 +522 147 +523 139 +524 111 +525 150 +526 152 +527 142 +528 138 +529 152 +530 128 +531 127 +532 127 +533 143 +534 108 +535 107 +536 115 +537 122 +538 128 +539 104 +540 119 +541 128 +542 108 +543 116 +544 130 +545 98 +546 107 +547 95 +548 105 +549 108 +550 102 +551 107 +552 104 +553 110 +554 85 +555 101 +556 77 +557 90 +558 86 +559 100 +560 86 +561 84 +562 90 +563 68 +564 72 +565 93 +566 72 +567 83 +568 89 +569 81 +570 74 +571 69 +572 64 +573 73 +574 68 +575 69 +576 81 +577 75 +578 67 +579 67 +580 73 +581 79 +582 72 +583 56 +584 73 +585 62 +586 51 +587 47 +588 62 +589 56 +590 60 +591 49 +592 57 +593 41 +594 67 +595 58 +596 48 +597 53 +598 55 +599 49 +600 54 +601 61 +602 57 +603 46 +604 46 +605 49 +606 47 +607 50 +608 52 +609 42 +610 36 +611 35 +612 38 +613 38 +614 42 +615 50 +616 41 +617 34 +618 47 +619 43 +620 44 +621 29 +622 44 +623 42 +624 35 +625 33 +626 19 +627 35 +628 45 +629 31 +630 37 +631 28 +632 22 +633 30 +634 31 +635 25 +636 30 +637 27 +638 21 +639 30 +640 35 +641 31 +642 30 +643 18 +644 28 +645 26 +646 26 +647 28 +648 28 +649 31 +650 19 +651 17 +652 21 +653 31 +654 25 +655 21 +656 28 +657 26 +658 22 +659 26 +660 18 +661 14 +662 11 +663 16 +664 18 +665 23 +666 12 +667 21 +668 15 +669 15 +670 9 +671 18 +672 13 +673 22 +674 17 +675 17 +676 8 +677 17 +678 16 +679 19 +680 6 +681 12 +682 15 +683 13 +684 12 +685 14 +686 4 +687 15 +688 13 +689 15 +690 24 +691 16 +692 11 +693 13 +694 17 +695 12 +696 7 +697 10 +698 15 +699 15 +700 12 +701 12 +702 19 +703 10 +704 12 +705 5 +706 12 +707 7 +708 4 +709 11 +710 6 +711 10 +712 12 +713 12 +714 7 +715 7 +716 6 +717 6 +718 8 +719 6 +720 7 +721 11 +722 6 +723 15 +724 9 +725 5 +726 7 +727 10 +728 7 +729 8 +730 4 +731 6 +732 10 +733 7 +734 11 +735 8 +736 6 +737 3 +738 9 +739 6 +740 4 +741 5 +742 9 +743 7 +744 4 +745 6 +746 8 +747 9 +748 1 +749 7 + diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala new file mode 100644 index 000000000..51139812f --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala @@ -0,0 +1,23 @@ +package nl.lumc.sasc.biopet.extensions.picard + +import java.io.File +import java.nio.file.Paths + +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by pjvan_thof on 2/18/15. + */ +class CollectAlignmentSummaryMetricsTest extends TestNGSuite with Matchers { + + @Test + def summaryData: Unit = { + val file = new File(Paths.get(getClass.getResource("/picard.alignmentMetrics").toURI).toString) + val job = new CollectAlignmentSummaryMetrics(null) + job.output = file + + job.summaryData + } +} diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala new file mode 100644 index 000000000..fc0c563e8 --- /dev/null +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala @@ -0,0 +1,23 @@ +package nl.lumc.sasc.biopet.extensions.picard + +import java.io.File +import java.nio.file.Paths + +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by pjvan_thof on 2/18/15. + */ +class CollectInsertSizeMetricsTest extends TestNGSuite with Matchers { + + @Test + def summaryData: Unit = { + val file = new File(Paths.get(getClass.getResource("/picard.insertsizemetrics").toURI).toString) + val job = new CollectInsertSizeMetrics(null) + job.output = file + + job.summaryData + } +} -- GitLab