Commit 5073525b authored by Sander Bollen's avatar Sander Bollen
Browse files

also scatter genotyping step

parent 195aba10
......@@ -18,9 +18,6 @@ FEMALE_THRESHOLD = config.get("FEMALE_THRESHOLD", 0.6)
_this_dir = workflow.current_basedir
GSC = join(join(_this_dir, "src"), "gc.sc")
CGSC = join(join(_this_dir, "src"), "cg.sc")
env_dir = join(_this_dir, "envs")
main_env = join(_this_dir, "environment.yml")
......@@ -182,14 +179,6 @@ rule printreads:
"-o {output.bam} -R {input.ref} -BQSR {input.grp}"
# rule qdir:
# params:
# qdir=out_path("{sample}/.qdir")
# output:
# aux=out_path("{sample}/.qdir/.aux")
# shell: "touch {output.aux}"
rule gvcf_scatter:
input:
bam=out_path("{sample}/bams/{sample}.baserecal.bam"),
......@@ -208,49 +197,52 @@ rule gvcf_scatter:
rule gvcf_gather:
input:
gvcfs=expand(out_path("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz"), chunk=CHUNKS),
gvcfs=expand(out_path("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz"),
chunk=CHUNKS),
ref=REFERENCE,
gatk=GATK
params:
gvcfs=" -V ".join(expand(out_path("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz"), chunk=CHUNKS))
gvcfs=" -V ".join(expand(out_path("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz"),
chunk=CHUNKS))
output:
gvcf=out_path("{sample}/vcf/{sample}.g.vcf.gz")
conda: "envs/gatk.yaml"
shell: "java -cp {input.gatk} org.broadinstitute.gatk.tools.CatVariants "\
"-R {input.ref} -V {params.gvcfs} -output {output.gvcf} -assumeSorted"
# rule gvcf:
# input:
# bam=out_path("{sample}/bams/{sample}.baserecal.bam"),
# queue=QUEUE,
# dbsnp=DBSNP,
# ref=REFERENCE,
# gc=GSC,
# qaux=out_path("{sample}/.qdir/.aux")
# params:
# qdir=out_path("{sample}/.qdir")
# output:
# gvcf=out_path("{sample}/vcf/{sample}.g.vcf.gz")
# conda: "envs/gatk.yml"
# shell: "java -jar {input.queue} -S {input.gc} -I {input.bam} "\
# "-D {input.dbsnp} -R {input.ref} -o {output.gvcf} "\
# "-jobResReq 'h_vmem=10G' -run -qsub -jobParaEnv BWA "\
# "-runDir {params.qdir}"
rule genotype:
"-R {input.ref} -V {params.gvcfs} -output {output.gvcf} "\
"-assumeSorted"
rule genotype_scatter:
input:
gvcfs=expand(out_path("{sample}/vcf/{sample}.g.vcf.gz"), sample=SAMPLES),
queue=QUEUE,
gvcfs = expand(out_path("{sample}/vcf/{sample}.g.vcf.gz"),
sample=SAMPLES),
ref=REFERENCE,
cg=CGSC
gatk=GATK
params:
li=" -V ".join(expand(out_path("{sample}/vcf/{sample}.g.vcf.gz"),
sample=SAMPLES))
chunk="{chunk}"
output:
vcf=out_path("multisample/genotype.{chunk}.part.vcf.gz")
conda: "envs/gatk.yaml"
shell: "java -jar {input.gatk} -T GenotypeGVCFs -R {input.ref} "\
"-V {params.li} -L {params.chunk} -o {output.vcf}"
rule genotype_gather:
input:
vcfs=expand(out_path("multisample/genotype.{chunk}.part.vcf.gz"),
chunk=CHUNKS),
ref=REFERENCE,
gatk=GATK
params:
li=" -I ".join(expand(out_path("{sample}/vcf/{sample}.g.vcf.gz"), sample=SAMPLES))
vcfs=" -V ".join(expand(out_path("multisample/genotype.{chunk}.part.vcf.gz"),
chunk=CHUNKS))
output:
combined=out_path("multisample/genotyped.vcf.gz")
conda: "envs/gatk.yml"
shell: "java -jar {input.queue} -S {input.cg} -I {params.li} "\
"-R {input.ref} -o {output.combined} "\
"-jobResReq 'h_vmem=10G' -run -qsub -jobParaEnv BWA"
shell: "java -cp {input.gatk} org.broadinstitute.gatk.tool.CatVariants "\
"-R {input.ref} -V {params.vcfs} -output {output.combined} "\
"-assumeSorted"
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.extensions.gatk._
import org.broadinstitute.gatk.queue.extensions.gatk.CombineGVCFs
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode
import org.broadinstitute.gatk.utils.commandline.Output
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType
class CombineOurGs extends QScript {
@Input(doc="The input bam file", shortName = "I")
var bamFile: List[File] = Nil
@Input(doc="The reference", shortName="R")
var referenceFile: File = _
@Output(doc="output", shortName="O")
var outFile: File = _
def script(): Unit = {
val genotyper = new CombineGVCFs
genotyper.scatterCount = 200
genotyper.variant = this.bamFile
genotyper.reference_sequence = this.referenceFile
genotyper.out = outFile
add(genotyper)
}
}
\ No newline at end of file
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.extensions.gatk._
import org.broadinstitute.gatk.queue.extensions.gatk.HaplotypeCaller
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode
import org.broadinstitute.gatk.utils.commandline.Output
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType
class GvcfWholeGenome extends QScript {
@Input(doc="The input bam file", shortName = "I")
var bamFile: List[File] = Nil
@Input(doc="The reference", shortName="R")
var referenceFile: File = _
@Input(doc="dbSNP vcf", shortName="D")
var dbSNPFile: File = _
@Output(doc="output", shortName="O")
var outFile: File = _
def script(): Unit = {
val genotyper = new HaplotypeCaller
genotyper.scatterCount = 200
genotyper.input_file = this.bamFile
genotyper.reference_sequence = this.referenceFile
genotyper.dbsnp = this.dbSNPFile
genotyper.out = outFile
genotyper.stand_call_conf = 30
genotyper.emitRefConfidence = ReferenceConfidenceMode.GVCF
genotyper.variant_index_type = GATKVCFIndexType.LINEAR
genotyper.variant_index_parameter = 128000
add(genotyper)
}
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment