diff --git a/README.md b/README.md index 02c9252d9be384b738037ab45dc2a7035f687b56..3214f99224c6ca96238db31a1aab047e5b1ec248 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ As a rough estimate of the total number of jobs in pipeline you can use the followig formula: ```math -jobs = 4+(22*n_{samples})+(1*n_{samples}*n_{beds})+(1*n_{samples}*n_{chunks})+(1*n_{chunks}) +jobs = 4+(21*n_{samples})+(1*n_{samples}*n_{beds})+(1*n_{samples}*n_{chunks})+(1*n_{chunks}) ``` This gives about 12,000 jobs for a 96-sample run with 2 bed files and 100 chunks. diff --git a/Snakefile b/Snakefile index f27e788bdefb8b1c5c3b57f3ee0f880e702a5208..822feb3e7414361b2e3d3662da0f8239bb48fbf0 100644 --- a/Snakefile +++ b/Snakefile @@ -208,7 +208,7 @@ rule markdup: params: tmp = out_path("tmp") output: - bam = temp(out_path("{sample}/bams/{sample}.markdup.bam")), + bam = out_path("{sample}/bams/{sample}.markdup.bam"), metrics = out_path("{sample}/bams/{sample}.markdup.metrics") conda: "envs/picard.yml" shell: "picard MarkDuplicates CREATE_INDEX=TRUE TMP_DIR={params.tmp} " \ @@ -235,24 +235,10 @@ rule baserecal: "{input.dbsnp} -knownSites {input.one1kg} " \ "-knownSites {input.hapmap}" -rule printreads: - input: - grp=out_path("{sample}/bams/{sample}.baserecal.grp"), - bam=out_path("{sample}/bams/{sample}.markdup.bam"), - java=JAVA, - gatk=GATK, - ref=REFERENCE - output: - bam=out_path("{sample}/bams/{sample}.baserecal.bam"), - bai=out_path("{sample}/bams/{sample}.baserecal.bai") - conda: "envs/gatk.yml" - shell: "{input.java} -jar {input.gatk} -T PrintReads -I {input.bam} "\ - "-o {output.bam} -R {input.ref} -BQSR {input.grp}" - - rule gvcf_scatter: input: - bam=out_path("{sample}/bams/{sample}.baserecal.bam"), + bam=out_path("{sample}/bams/{sample}.markdup.bam"), + bqsr=out_path("{sample}/bams/{sample}.baserecal.grp"), dbsnp=DBSNP, ref=REFERENCE, gatk=GATK @@ -264,7 +250,8 @@ rule gvcf_scatter: shell: "java -jar -Xmx4G {input.gatk} -T HaplotypeCaller -ERC GVCF -I "\ "{input.bam} -R {input.ref} -D {input.dbsnp} "\ "-L {params.chunk} -o {output.gvcf} "\ - "-variant_index_type LINEAR -variant_index_parameter 128000" + "-variant_index_type LINEAR -variant_index_parameter 128000 " \ + "-BQSR {input.bqsr}" rule gvcf_gather: