From a9beb86776bca66eedc95646ce957b638cefcd6e Mon Sep 17 00:00:00 2001 From: bow <bow@bow.web.id> Date: Sat, 27 Jun 2015 17:32:11 +0200 Subject: [PATCH] Add missing stats display in Gentrap report --- .../pipelines/gentrap/scripts/pdf_report.py | 72 ++++++++++--------- .../gentrap/templates/pdf/sample_mapping.tex | 4 +- 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/pdf_report.py b/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/pdf_report.py index e0931c4b3..b0c2b2e82 100755 --- a/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/pdf_report.py +++ b/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/pdf_report.py @@ -418,6 +418,8 @@ class GentrapLib(object): self.fastqc_r2_qc = FastQC(self.fastqc_r2_qc_files["fastqc_data"]["path"]) # mapping metrics settings self.aln_metrics = summary.get("bammetrics", {}).get("stats", {}).get("CollectAlignmentSummaryMetrics", {}) + for k, v in self.aln_metrics.items(): + self.aln_metrics[k] = {a.lower(): b for a, b in v.items()} # insert size metrics files self.inserts_metrics_files = \ summary.get("bammetrics", {}).get("files", {}).get("multi_metrics", {}) @@ -428,25 +430,26 @@ class GentrapLib(object): if "metrics" in _rmetrics: _rmetrics = _rmetrics["metrics"] if _rmetrics: - self.rna_metrics = {k: v for k, v in _rmetrics.items() } - pf_bases = float(_rmetrics["PF_BASES"]) - exonic_bases = int(_rmetrics.get("CODING_BASES", 0)) + int(_rmetrics.get("UTR_BASES", 0)) + _rmetrics = {k.lower(): v for k, v in _rmetrics.items() } + self.rna_metrics = _rmetrics + pf_bases = float(_rmetrics["pf_bases"]) + exonic_bases = int(_rmetrics.get("coding_bases", 0)) + int(_rmetrics.get("utr_bases", 0)) # picard uses pct_ but it's actually ratio ~ we follow their convention - pct_exonic_bases_all = exonic_bases / float(_rmetrics["PF_BASES"]) - pct_exonic_bases = exonic_bases / float(_rmetrics.get("PF_ALIGNED_BASES", 0)) + pct_exonic_bases_all = exonic_bases / float(_rmetrics["pf_bases"]) + pct_exonic_bases = exonic_bases / float(_rmetrics.get("pf_aligned_bases", 0)) self.rna_metrics.update({ - "EXONIC_BASES": exonic_bases, - "PCT_EXONIC_BASES_ALL": pct_exonic_bases_all, - "PCT_EXONIC_BASES": pct_exonic_bases, - "PCT_ALIGNED_BASES": 1.0, - "PCT_ALIGNED_BASES_ALL": float(_rmetrics.get("PF_ALIGNED_BASES", 0.0)) / pf_bases, - "PCT_CODING_BASES_ALL": float(_rmetrics.get("CODING_BASES", 0.0)) / pf_bases, - "PCT_UTR_BASES_ALL": float(_rmetrics.get("UTR_BASES", 0.0)) / pf_bases, - "PCT_INTRONIC_BASES_ALL": float(_rmetrics.get("INTRONIC_BASES", 0.0)) / pf_bases, - "PCT_INTERGENIC_BASES_ALL": float(_rmetrics.get("INTERGENIC_BASES", 0.0)) / pf_bases, + "exonic_bases": exonic_bases, + "pct_exonic_bases_all": pct_exonic_bases_all, + "pct_exonic_bases": pct_exonic_bases, + "pct_aligned_bases": 1.0, + "pct_aligned_bases_all": float(_rmetrics.get("pf_aligned_bases", 0.0)) / pf_bases, + "pct_coding_bases_all": float(_rmetrics.get("coding_bases", 0.0)) / pf_bases, + "pct_utr_bases_all": float(_rmetrics.get("utr_bases", 0.0)) / pf_bases, + "pct_intronic_bases_all": float(_rmetrics.get("intronic_bases", 0.0)) / pf_bases, + "pct_intergenic_bases_all": float(_rmetrics.get("intergenic_bases", 0.0)) / pf_bases, }) - if _rmetrics.get("RIBOSOMAL_BASES", "") != "": - self.rna_metrics["PCT_RIBOSOMAL_BASES_ALL"] = float(_rmetrics.get("PF_RIBOSOMAL_BASES", 0.0)) / pf_bases + if _rmetrics.get("ribosomal_bases", "") != "": + self.rna_metrics["pct_ribosomal_bases_all"] = float(_rmetrics.get("pf_ribosomal_bases", 0.0)) / pf_bases def __repr__(self): return "{0}(sample=\"{1}\", lib=\"{2}\")".format( @@ -463,9 +466,11 @@ class GentrapSample(object): self.is_paired_end = summary.get("gentrap", {}).get("stats", {}).get("pipeline", {})["all_paired"] # mapping metrics settings self.aln_metrics = summary.get("bammetrics", {}).get("stats", {}).get("CollectAlignmentSummaryMetrics", {}) + for k, v in self.aln_metrics.items(): + self.aln_metrics[k] = {a.lower(): b for a, b in v.items()} # insert size metrics files self.inserts_metrics_files = \ - summary.get("bammetrics", {}).get("files", {}).get("CollectInsertSizeMetrics", {}).get("metrics", {}) + summary.get("bammetrics", {}).get("files", {}).get("multi_metrics", {}) # rna metrics files and stats self.rna_metrics_files = summary.get("bammetrics", {}).get("files", {}).get("rna", {}) _rmetrics = summary.get("bammetrics", {}).get("stats", {}).get("rna", {}) @@ -473,25 +478,26 @@ class GentrapSample(object): if "metrics" in _rmetrics: _rmetrics = _rmetrics["metrics"] if _rmetrics: - self.rna_metrics = {k: v for k, v in _rmetrics.items() } - pf_bases = float(_rmetrics["PF_BASES"]) - exonic_bases = int(_rmetrics.get("CODING_BASES", 0)) + int(_rmetrics.get("UTR_BASES", 0)) + _rmetrics = {k.lower(): v for k, v in _rmetrics.items() } + self.rna_metrics = _rmetrics + pf_bases = float(_rmetrics["pf_bases"]) + exonic_bases = int(_rmetrics.get("coding_bases", 0)) + int(_rmetrics.get("utr_bases", 0)) # picard uses pct_ but it's actually ratio ~ we follow their convention - pct_exonic_bases_all = exonic_bases / float(_rmetrics["PF_BASES"]) - pct_exonic_bases = exonic_bases / float(_rmetrics.get("PF_ALIGNED_BASES", 0)) + pct_exonic_bases_all = exonic_bases / float(_rmetrics["pf_bases"]) + pct_exonic_bases = exonic_bases / float(_rmetrics.get("pf_aligned_bases", 0)) self.rna_metrics.update({ - "EXONIC_BASES": exonic_bases, - "PCT_EXONIC_BASES_ALL": pct_exonic_bases_all, - "PCT_EXONIC_BASES": pct_exonic_bases, - "PCT_ALIGNED_BASES": 1.0, - "PCT_ALIGNED_BASES_ALL": float(_rmetrics.get("PF_ALIGNED_BASES", 0.0)) / pf_bases, - "PCT_CODING_BASES_ALL": float(_rmetrics.get("CODING_BASES", 0.0)) / pf_bases, - "PCT_UTR_BASES_ALL": float(_rmetrics.get("UTR_BASES", 0.0)) / pf_bases, - "PCT_INTRONIC_BASES_ALL": float(_rmetrics.get("INTRONIC_BASES", 0.0)) / pf_bases, - "PCT_INTERGENIC_BASES_ALL": float(_rmetrics.get("INTERGENIC_BASES", 0.0)) / pf_bases, + "exonic_bases": exonic_bases, + "pct_exonic_bases_all": pct_exonic_bases_all, + "pct_exonic_bases": pct_exonic_bases, + "pct_aligned_bases": 1.0, + "pct_aligned_bases_all": float(_rmetrics.get("pf_aligned_bases", 0.0)) / pf_bases, + "pct_coding_bases_all": float(_rmetrics.get("coding_bases", 0.0)) / pf_bases, + "pct_utr_bases_all": float(_rmetrics.get("utr_bases", 0.0)) / pf_bases, + "pct_intronic_bases_all": float(_rmetrics.get("intronic_bases", 0.0)) / pf_bases, + "pct_intergenic_bases_all": float(_rmetrics.get("intergenic_bases", 0.0)) / pf_bases, }) - if _rmetrics.get("RIBOSOMAL_BASES", "") != "": - self.rna_metrics["PCT_RIBOSOMAL_BASES_ALL"] = float(_rmetrics.get("PF_RIBOSOMAL_BASES", 0.0)) / pf_bases + if _rmetrics.get("ribosomal_bases", "") != "": + self.rna_metrics["pct_ribosomal_bases_all"] = float(_rmetrics.get("pf_ribosomal_bases", 0.0)) / pf_bases self.lib_names = sorted(summary["libraries"].keys()) self.libs = \ diff --git a/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/templates/pdf/sample_mapping.tex b/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/templates/pdf/sample_mapping.tex index da99f47d2..4f4da850d 100644 --- a/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/templates/pdf/sample_mapping.tex +++ b/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/templates/pdf/sample_mapping.tex @@ -47,11 +47,11 @@ % inferred insert size distribution \subsubsection{Insert size distribution} -\IfFileExists{((( sample.inserts_metrics_files.output_histogram.path )))} +\IfFileExists{((( sample.inserts_metrics_files.insert_size_histogram.path )))} { \begin{figure}[h!] \centering - \includegraphics[width=0.7\textwidth]{((( sample.inserts_metrics_files.output_histogram.path )))} + \includegraphics[width=0.7\textwidth]{((( sample.inserts_metrics_files.insert_size_histogram.path )))} \caption{Distribution of insert size length of paired-end reads mapped to opposite strands.} \end{figure} } -- GitLab