From eb8aef9357f1cd2c7e3b8c45af1d20ba98897786 Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Wed, 18 Feb 2015 11:47:34 +0100
Subject: [PATCH] Fix picard summaries

---
 .../pipelines/bammetrics/BamMetrics.scala     |   1 +
 .../CollectAlignmentSummaryMetrics.scala      |  14 +-
 .../picard/CollectInsertSizeMetrics.scala     |   2 +-
 .../test/resources/picard.alignmentMetrics    |  12 +
 .../test/resources/picard.insertsizemetrics   | 717 ++++++++++++++++++
 .../CollectAlignmentSummaryMetricsTest.scala  |  23 +
 .../picard/CollectInsertSizeMetricsTest.scala |  23 +
 7 files changed, 786 insertions(+), 6 deletions(-)
 create mode 100644 public/biopet-framework/src/test/resources/picard.alignmentMetrics
 create mode 100644 public/biopet-framework/src/test/resources/picard.insertsizemetrics
 create mode 100644 public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala
 create mode 100644 public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala

diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala
index 506e42337..cbbf91da7 100644
--- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala
+++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala
@@ -83,6 +83,7 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
       add(BedToInterval(this, baitBedFile, inputBam, outputDir), true)
 
     for (bedFile <- bedFiles) {
+      //TODO: Add target jobs to summary
       val targetDir = new File(outputDir, bedFile.getName.stripSuffix(".bed"))
       val targetInterval = BedToInterval(this, bedFile, inputBam, targetDir)
       add(targetInterval, true)
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala
index 8fad20529..f69114521 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala
@@ -66,11 +66,15 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with
   def summaryData: Map[String, Any] = {
     val (header, content) = Picard.getMetrics(output)
 
-    (for (category <- 0 to content.size) yield {
-      content(category)(0) -> (for (i <- 1 to header.size) yield {
-        header(i).toLowerCase -> content(category)(i)
-      }).toMap
-    }).toMap
+    (for (category <- 0 until content.size) yield {
+      content(category)(0) -> (
+        for (
+          i <- 1 until header.size if i < content(category).size
+        ) yield {
+          header(i).toLowerCase -> content(category)(i)
+        }).toMap
+    }
+    ).toMap
   }
 }
 
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala
index f83298822..9585491f9 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala
@@ -75,7 +75,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa
 
   def summaryData: Map[String, Any] = {
     val (header, content) = Picard.getMetrics(output)
-    (for (i <- 0 to header.size)
+    (for (i <- 0 to header.size if i < content.head.size)
       yield (header(i).toLowerCase -> content.head(i))).toMap
   }
 }
diff --git a/public/biopet-framework/src/test/resources/picard.alignmentMetrics b/public/biopet-framework/src/test/resources/picard.alignmentMetrics
new file mode 100644
index 000000000..2f05f08c4
--- /dev/null
+++ b/public/biopet-framework/src/test/resources/picard.alignmentMetrics
@@ -0,0 +1,12 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.analysis.CollectAlignmentSummaryMetrics INPUT=/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_2/8080_2#43-8080_2.dedup.bam OUTPUT=/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_2/metrics/8080_2#43-8080_2.dedup.alignmentMetrics REFERENCE_SEQUENCE=/home/cwknetsch/pipeline_pilot/input/reference/M120_23032012.fasta ASSUME_SORTED=true TMP_DIR=[/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/.queue/tmp] CREATE_INDEX=true    MAX_INSERT_SIZE=100000 ADAPTER_SEQUENCE=[AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG] METRIC_ACCUMULATION_LEVEL=[ALL_READS] IS_BISULFITE_SEQUENCED=false STOP_AFTER=0 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_MD5_FILE=false
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Tue Feb 17 15:40:01 CET 2015
+
+## METRICS CLASS	picard.analysis.AlignmentSummaryMetrics
+CATEGORY	TOTAL_READS	PF_READS	PCT_PF_READS	PF_NOISE_READS	PF_READS_ALIGNED	PCT_PF_READS_ALIGNED	PF_ALIGNED_BASES	PF_HQ_ALIGNED_READS	PF_HQ_ALIGNED_BASES	PF_HQ_ALIGNED_Q20_BASES	PF_HQ_MEDIAN_MISMATCHES	PF_MISMATCH_RATE	PF_HQ_ERROR_RATE	PF_INDEL_RATE	MEAN_READ_LENGTH	READS_ALIGNED_IN_PAIRS	PCT_READS_ALIGNED_IN_PAIRS	BAD_CYCLES	STRAND_BALANCE	PCT_CHIMERAS	PCT_ADAPTER	SAMPLE	LIBRARY	READ_GROUP
+FIRST_OF_PAIR	664252	664252	1	0	606507	0.913068	60008919	594828	58868522	58661158	0	0.000976	0.000962	0.00003	99.071709	603803	0.995542	0	0.499488	0.027347	0.000056			
+SECOND_OF_PAIR	664252	664252	1	0	606337	0.912812	60044312	594744	58913706	58680340	0	0.000842	0.00083	0.000034	99.128317	603803	0.995821	0	0.500456	0.027347	0.000033			
+PAIR	1328504	1328504	1	0	1212844	0.91294	120053231	1189572	117782228	117341498	0	0.000909	0.000896	0.000032	99.100013	1207606	0.995681	0	0.499972	0.027347	0.000044			
+
+
diff --git a/public/biopet-framework/src/test/resources/picard.insertsizemetrics b/public/biopet-framework/src/test/resources/picard.insertsizemetrics
new file mode 100644
index 000000000..ae5131591
--- /dev/null
+++ b/public/biopet-framework/src/test/resources/picard.insertsizemetrics
@@ -0,0 +1,717 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.analysis.CollectInsertSizeMetrics HISTOGRAM_FILE=/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_2/metrics/8080_2#43-8080_2.dedup.insertsizemetrics.pdf INPUT=/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_2/8080_2#43-8080_2.dedup.bam OUTPUT=/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/samples/8080_2#43/lib_8080_2/metrics/8080_2#43-8080_2.dedup.insertsizemetrics REFERENCE_SEQUENCE=/home/cwknetsch/pipeline_pilot/input/reference/M120_23032012.fasta ASSUME_SORTED=true TMP_DIR=[/data/DIV5/SASC/project-049-SNPtypingbac/analysis/runs/sp/.queue/tmp] CREATE_INDEX=true    DEVIATIONS=10.0 MINIMUM_PCT=0.05 METRIC_ACCUMULATION_LEVEL=[ALL_READS] STOP_AFTER=0 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_MD5_FILE=false
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Tue Feb 17 15:40:01 CET 2015
+
+## METRICS CLASS	picard.analysis.InsertSizeMetrics
+MEDIAN_INSERT_SIZE	MEDIAN_ABSOLUTE_DEVIATION	MIN_INSERT_SIZE	MAX_INSERT_SIZE	MEAN_INSERT_SIZE	STANDARD_DEVIATION	READ_PAIRS	PAIR_ORIENTATION	WIDTH_OF_10_PERCENT	WIDTH_OF_20_PERCENT	WIDTH_OF_30_PERCENT	WIDTH_OF_40_PERCENT	WIDTH_OF_50_PERCENT	WIDTH_OF_60_PERCENT	WIDTH_OF_70_PERCENT	WIDTH_OF_80_PERCENT	WIDTH_OF_90_PERCENT	WIDTH_OF_99_PERCENT	SAMPLE	LIBRARY	READ_GROUP
+249	50	19	4010275	269.721498	88.166884	585513	FR	21	41	59	81	101	123	151	191	297	849			
+
+## HISTOGRAM	java.lang.Integer
+insert_size	All_Reads.fr_count
+19	51
+20	21
+21	4
+22	4
+23	1
+27	4
+28	1
+29	1
+31	1
+32	3
+34	2
+35	2
+36	4
+37	1
+40	2
+43	1
+46	1
+47	2
+49	1
+53	1
+54	1
+57	4
+58	1
+59	4
+62	2
+64	3
+66	4
+70	1
+71	3
+73	1
+74	1
+75	2
+77	3
+78	3
+79	3
+80	2
+81	5
+82	1
+83	4
+84	5
+85	3
+86	8
+87	4
+88	1
+89	4
+90	3
+91	8
+92	15
+93	3
+94	3
+95	2
+96	2
+97	10
+98	2
+99	7
+100	2
+101	6
+102	9
+103	2
+104	8
+105	9
+106	8
+107	7
+108	5
+109	14
+110	10
+111	14
+112	18
+113	15
+114	10
+115	13
+116	10
+117	29
+118	21
+119	37
+120	24
+121	37
+122	53
+123	48
+124	45
+125	54
+126	65
+127	70
+128	79
+129	107
+130	105
+131	96
+132	144
+133	158
+134	139
+135	184
+136	186
+137	236
+138	222
+139	243
+140	286
+141	292
+142	347
+143	407
+144	429
+145	479
+146	499
+147	550
+148	571
+149	648
+150	677
+151	691
+152	787
+153	856
+154	960
+155	956
+156	1023
+157	1097
+158	1154
+159	1280
+160	1268
+161	1357
+162	1476
+163	1539
+164	1646
+165	1658
+166	1746
+167	1788
+168	1948
+169	1987
+170	2101
+171	2083
+172	2209
+173	2324
+174	2298
+175	2403
+176	2441
+177	2580
+178	2613
+179	2734
+180	2780
+181	2926
+182	2830
+183	2989
+184	2961
+185	3077
+186	3098
+187	3041
+188	3250
+189	3144
+190	3257
+191	3286
+192	3238
+193	3378
+194	3553
+195	3403
+196	3454
+197	3443
+198	3452
+199	3457
+200	3633
+201	3664
+202	3616
+203	3589
+204	3547
+205	3541
+206	3635
+207	3644
+208	3634
+209	3699
+210	3656
+211	3645
+212	3623
+213	3584
+214	3627
+215	3586
+216	3588
+217	3543
+218	3536
+219	3596
+220	3562
+221	3670
+222	3494
+223	3563
+224	3511
+225	3414
+226	3439
+227	3404
+228	3580
+229	3443
+230	3325
+231	3394
+232	3384
+233	3378
+234	3308
+235	3404
+236	3362
+237	3295
+238	3302
+239	3170
+240	3151
+241	3232
+242	3233
+243	3139
+244	3058
+245	3152
+246	3152
+247	3118
+248	2958
+249	2994
+250	3070
+251	2923
+252	2960
+253	2980
+254	2875
+255	2808
+256	2821
+257	2802
+258	2809
+259	2684
+260	2766
+261	2756
+262	2686
+263	2687
+264	2640
+265	2559
+266	2590
+267	2574
+268	2529
+269	2543
+270	2484
+271	2520
+272	2391
+273	2442
+274	2490
+275	2339
+276	2325
+277	2335
+278	2331
+279	2282
+280	2334
+281	2114
+282	2164
+283	2151
+284	2166
+285	2192
+286	2146
+287	2108
+288	2131
+289	2094
+290	2055
+291	2042
+292	1941
+293	2110
+294	2000
+295	1979
+296	1923
+297	1955
+298	1881
+299	1946
+300	1936
+301	1846
+302	1830
+303	1806
+304	1818
+305	1756
+306	1746
+307	1717
+308	1726
+309	1762
+310	1610
+311	1635
+312	1577
+313	1625
+314	1615
+315	1536
+316	1521
+317	1602
+318	1579
+319	1546
+320	1493
+321	1453
+322	1523
+323	1518
+324	1451
+325	1406
+326	1387
+327	1331
+328	1450
+329	1297
+330	1342
+331	1278
+332	1356
+333	1374
+334	1285
+335	1250
+336	1270
+337	1298
+338	1293
+339	1216
+340	1265
+341	1204
+342	1244
+343	1250
+344	1212
+345	1126
+346	1170
+347	1181
+348	1120
+349	1087
+350	1106
+351	1050
+352	1037
+353	1038
+354	1004
+355	1067
+356	1037
+357	1025
+358	1021
+359	1030
+360	992
+361	993
+362	979
+363	888
+364	909
+365	905
+366	899
+367	946
+368	919
+369	892
+370	893
+371	871
+372	897
+373	894
+374	870
+375	832
+376	824
+377	863
+378	785
+379	833
+380	811
+381	776
+382	788
+383	775
+384	761
+385	755
+386	761
+387	725
+388	711
+389	650
+390	664
+391	731
+392	733
+393	725
+394	667
+395	661
+396	659
+397	651
+398	740
+399	688
+400	679
+401	595
+402	606
+403	607
+404	603
+405	605
+406	567
+407	617
+408	582
+409	551
+410	554
+411	572
+412	578
+413	505
+414	557
+415	549
+416	516
+417	517
+418	509
+419	526
+420	529
+421	486
+422	526
+423	503
+424	479
+425	455
+426	479
+427	483
+428	446
+429	431
+430	435
+431	504
+432	462
+433	464
+434	447
+435	399
+436	389
+437	439
+438	414
+439	383
+440	391
+441	408
+442	389
+443	380
+444	398
+445	368
+446	378
+447	354
+448	377
+449	354
+450	341
+451	356
+452	322
+453	336
+454	348
+455	333
+456	363
+457	315
+458	318
+459	329
+460	304
+461	323
+462	314
+463	319
+464	295
+465	299
+466	301
+467	266
+468	278
+469	278
+470	286
+471	243
+472	260
+473	248
+474	261
+475	297
+476	256
+477	240
+478	215
+479	264
+480	213
+481	256
+482	264
+483	230
+484	275
+485	220
+486	244
+487	226
+488	206
+489	193
+490	240
+491	184
+492	217
+493	223
+494	213
+495	214
+496	200
+497	198
+498	191
+499	207
+500	190
+501	183
+502	205
+503	203
+504	174
+505	181
+506	193
+507	197
+508	177
+509	185
+510	179
+511	165
+512	167
+513	177
+514	183
+515	133
+516	165
+517	153
+518	145
+519	143
+520	135
+521	133
+522	147
+523	139
+524	111
+525	150
+526	152
+527	142
+528	138
+529	152
+530	128
+531	127
+532	127
+533	143
+534	108
+535	107
+536	115
+537	122
+538	128
+539	104
+540	119
+541	128
+542	108
+543	116
+544	130
+545	98
+546	107
+547	95
+548	105
+549	108
+550	102
+551	107
+552	104
+553	110
+554	85
+555	101
+556	77
+557	90
+558	86
+559	100
+560	86
+561	84
+562	90
+563	68
+564	72
+565	93
+566	72
+567	83
+568	89
+569	81
+570	74
+571	69
+572	64
+573	73
+574	68
+575	69
+576	81
+577	75
+578	67
+579	67
+580	73
+581	79
+582	72
+583	56
+584	73
+585	62
+586	51
+587	47
+588	62
+589	56
+590	60
+591	49
+592	57
+593	41
+594	67
+595	58
+596	48
+597	53
+598	55
+599	49
+600	54
+601	61
+602	57
+603	46
+604	46
+605	49
+606	47
+607	50
+608	52
+609	42
+610	36
+611	35
+612	38
+613	38
+614	42
+615	50
+616	41
+617	34
+618	47
+619	43
+620	44
+621	29
+622	44
+623	42
+624	35
+625	33
+626	19
+627	35
+628	45
+629	31
+630	37
+631	28
+632	22
+633	30
+634	31
+635	25
+636	30
+637	27
+638	21
+639	30
+640	35
+641	31
+642	30
+643	18
+644	28
+645	26
+646	26
+647	28
+648	28
+649	31
+650	19
+651	17
+652	21
+653	31
+654	25
+655	21
+656	28
+657	26
+658	22
+659	26
+660	18
+661	14
+662	11
+663	16
+664	18
+665	23
+666	12
+667	21
+668	15
+669	15
+670	9
+671	18
+672	13
+673	22
+674	17
+675	17
+676	8
+677	17
+678	16
+679	19
+680	6
+681	12
+682	15
+683	13
+684	12
+685	14
+686	4
+687	15
+688	13
+689	15
+690	24
+691	16
+692	11
+693	13
+694	17
+695	12
+696	7
+697	10
+698	15
+699	15
+700	12
+701	12
+702	19
+703	10
+704	12
+705	5
+706	12
+707	7
+708	4
+709	11
+710	6
+711	10
+712	12
+713	12
+714	7
+715	7
+716	6
+717	6
+718	8
+719	6
+720	7
+721	11
+722	6
+723	15
+724	9
+725	5
+726	7
+727	10
+728	7
+729	8
+730	4
+731	6
+732	10
+733	7
+734	11
+735	8
+736	6
+737	3
+738	9
+739	6
+740	4
+741	5
+742	9
+743	7
+744	4
+745	6
+746	8
+747	9
+748	1
+749	7
+
diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala
new file mode 100644
index 000000000..51139812f
--- /dev/null
+++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala
@@ -0,0 +1,23 @@
+package nl.lumc.sasc.biopet.extensions.picard
+
+import java.io.File
+import java.nio.file.Paths
+
+import org.scalatest.Matchers
+import org.scalatest.testng.TestNGSuite
+import org.testng.annotations.Test
+
+/**
+ * Created by pjvan_thof on 2/18/15.
+ */
+class CollectAlignmentSummaryMetricsTest extends TestNGSuite with Matchers {
+
+  @Test
+  def summaryData: Unit = {
+    val file = new File(Paths.get(getClass.getResource("/picard.alignmentMetrics").toURI).toString)
+    val job = new CollectAlignmentSummaryMetrics(null)
+    job.output = file
+
+    job.summaryData
+  }
+}
diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala
new file mode 100644
index 000000000..fc0c563e8
--- /dev/null
+++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala
@@ -0,0 +1,23 @@
+package nl.lumc.sasc.biopet.extensions.picard
+
+import java.io.File
+import java.nio.file.Paths
+
+import org.scalatest.Matchers
+import org.scalatest.testng.TestNGSuite
+import org.testng.annotations.Test
+
+/**
+ * Created by pjvan_thof on 2/18/15.
+ */
+class CollectInsertSizeMetricsTest extends TestNGSuite with Matchers {
+
+  @Test
+  def summaryData: Unit = {
+    val file = new File(Paths.get(getClass.getResource("/picard.insertsizemetrics").toURI).toString)
+    val job = new CollectInsertSizeMetrics(null)
+    job.output = file
+
+    job.summaryData
+  }
+}
-- 
GitLab