Commit 3396fedd authored by van den Berg's avatar van den Berg
Browse files

Add picard insertSize metrics tot stats.json

This also required reordering some snakemake rules to make sure that the
correct input files are available. When using rule based inputs, the
rules in the Snakefile have to be sorted, and only rule based inputs
from rules that occur earlier in the Snakefile can be used.
parent 583aed74
Pipeline #3802 failed with stages
in 29 minutes and 44 seconds
......@@ -437,26 +437,6 @@ rule collectstats:
"--cutadapt {input.cutadapt} "
"{input.cov} > {output}"
rule merge_stats:
"""Merge all stats of all samples"""
input:
cols = expand("{sample}/{sample}.stats.json",
sample=config['samples']),
mpy = config["merge_stats"]
output: "stats.json"
container: containers["vtools"]
shell: "python {input.mpy} --collectstats {input.cols} "
"> {output}"
rule stats_tsv:
"""Convert stats.json to tsv"""
input:
stats = rules.merge_stats.output,
sc = config["stats_to_tsv"]
output: "stats.tsv"
container: containers["python3"]
shell: "python {input.sc} -i {input.stats} > {output}"
rule multiple_metrics:
"""Run picard CollectMultipleMetrics"""
input:
......@@ -480,7 +460,6 @@ rule multiqc:
Depends on stats.tsv to forcefully run at end of pipeline
"""
input:
stats = rules.stats_tsv.output,
bam = expand("{sample}/bams/{sample}.bam", sample=config["samples"]),
metric = expand("{sample}/bams/{sample}.metrics",
sample=config["samples"]),
......@@ -492,7 +471,36 @@ rule multiqc:
"{sample}/bams/{sample}.insert_size_metrics",
sample=config["samples"]
),
output: "multiqc_report/multiqc_report.html"
fastqc_raw = (f"{sample}/pre_process/raw-{sample}-{read_group}/"
for read_group, sample in get_readgroup_per_sample()),
fastqc_trim = (f"{sample}/pre_process/trimmed-{sample}-{read_group}/"
for read_group, sample in get_readgroup_per_sample())
output:
html = "multiqc_report/multiqc_report.html",
insert = "multiqc_report/multiqc_data/multiqc_picard_insertSize.json"
container: containers["multiqc"]
shell: "multiqc --data-format json --force --outdir multiqc_report . "
"|| touch {output}"
rule merge_stats:
"""Merge all stats of all samples"""
input:
cols = expand("{sample}/{sample}.stats.json",
sample=config['samples']),
mpy = config["merge_stats"],
insertSize = rules.multiqc.output.insert
output: "stats.json"
container: containers["vtools"]
shell: "python {input.mpy} --collectstats {input.cols} "
"--picard-insertSize {input.insertSize} > {output}"
rule stats_tsv:
"""Convert stats.json to tsv"""
input:
stats = rules.merge_stats.output,
sc = config["stats_to_tsv"]
output: "stats.tsv"
container: containers["python3"]
shell: "python {input.sc} -i {input.stats} > {output}"
......@@ -31,6 +31,20 @@ def parse_json(path):
return json.load(handle)
def add_picard_insertSize(data, filename):
""" Add the picard insertSize for each sample to data """
insert = parse_json(filename)
for sample in insert.values():
name = sample.pop('SAMPLE_NAME')
for d in data['sample_stats']:
if d['sample_name'] == name:
d['picard_insertSize'] = sample
break
else:
raise RuntimeError(f"Unknown sample {name}")
def main(collectstats):
data = dict()
data["sample_stats"] = list()
......@@ -38,6 +52,9 @@ def main(collectstats):
for stats in collectstats:
cs = parse_json(stats)
data["sample_stats"].append(cs)
if args.picard_insertSize:
add_picard_insertSize(data, args.picard_insertSize)
print(json.dumps(data))
......@@ -48,5 +65,9 @@ if __name__ == "__main__":
nargs='+',
required=True,
help='Path to the collected stats for each sample')
parser.add_argument('--picard-insertSize',
required=False,
help=('Path to multiQC json summary for picard '
'insertSize'))
args = parser.parse_args()
main(args.collectstats)
......@@ -56,6 +56,10 @@
- '"SAMPLE_NAME": "micro",'
must_not_contain:
- '"SAMPLE_NAME": "micro.markdup",'
- path: "stats.json"
contains:
- "MEDIAN_INSERT_SIZE"
- "WIDTH_OF_99_PERCENT"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment