From 3fa0f1411831448f15e17506dfef9230b303a5f1 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg <RedmarvandenBerg@lumc.nl> Date: Thu, 15 Oct 2020 15:38:45 +0200 Subject: [PATCH] Remove most inputs --- whatshap.wdl | 110 ++++++++------------------------------------------- 1 file changed, 16 insertions(+), 94 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index 2506aa1..1334d45 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -26,33 +26,13 @@ task Phase { String outputVCF File? reference File? referenceIndex - Boolean? no_reference String? tag - File? output_read_list String? algorithm - Boolean? merge_reads - String? internal_downsampling - String? mapping_quality Boolean? indels - Boolean? ignore_read_groups String? sample String? chromosome - String? error_rate - String? maximum_error_rate String? threshold - String? negative_threshold - Boolean? full_genotyping - Boolean? distrust_genotypes - Boolean? include_homozygous - String? default_gq - String? gl_regularize_r - File? changed_genotype_list String? ped - File? recombination_list - String? recomb_rate - File? gen_map - Boolean? no_genetic_haplo_typing - Boolean? use_ped_samples File vcf File vcfIndex File phaseInput @@ -70,33 +50,13 @@ task Phase { ~{phaseInput} \ ~{if defined(outputVCF) then ("--output " + '"' + outputVCF + '"') else ""} \ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ - ~{true="--no-reference" false="" no_reference} \ ~{if defined(tag) then ("--tag " + '"' + tag + '"') else ""} \ - ~{if defined(output_read_list) then ("--output-read-list " + '"' + output_read_list + '"') else ""} \ ~{if defined(algorithm) then ("--algorithm " + '"' + algorithm + '"') else ""} \ - ~{true="--merge-reads" false="" merge_reads} \ - ~{if defined(internal_downsampling) then ("--internal-downsampling " + '"' + internal_downsampling + '"') else ""} \ - ~{if defined(mapping_quality) then ("--mapping-quality " + '"' + mapping_quality + '"') else ""} \ ~{true="--indels" false="" indels} \ - ~{true="--ignore-read-groups" false="" ignore_read_groups} \ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} \ - ~{if defined(error_rate) then ("--error-rate " + '"' + error_rate + '"') else ""} \ - ~{if defined(maximum_error_rate) then ("--maximum-error-rate " + '"' + maximum_error_rate + '"') else ""} \ ~{if defined(threshold) then ("--threshold " + '"' + threshold + '"') else ""} \ - ~{if defined(negative_threshold) then ("--negative-threshold " + '"' + negative_threshold + '"') else ""} \ - ~{true="--full-genotyping" false="" full_genotyping} \ - ~{true="--distrust-genotypes" false="" distrust_genotypes} \ - ~{true="--include-homozygous" false="" include_homozygous} \ - ~{if defined(default_gq) then ("--default-gq " + '"' + default_gq + '"') else ""} \ - ~{if defined(gl_regularize_r) then ("--gl-regularizer " + '"' + gl_regularize_r + '"') else ""} \ - ~{if defined(changed_genotype_list) then ("--changed-genotype-list " + '"' + changed_genotype_list + '"') else ""} \ ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} \ - ~{if defined(recombination_list) then ("--recombination-list " + '"' + recombination_list + '"') else ""} \ - ~{if defined(recomb_rate) then ("--recombrate " + '"' + recomb_rate + '"') else ""} \ - ~{if defined(gen_map) then ("--genmap " + '"' + gen_map + '"') else ""} \ - ~{true="--no-genetic-haplotyping" false="" no_genetic_haplo_typing} \ - ~{true="--use-ped-samples" false="" use_ped_samples} && \ tabix -p vcf ~{outputVCF} } @@ -114,33 +74,13 @@ task Phase { parameter_meta { outputVCF: {description: "Output VCF file. Add .gz to the file name to get compressed output. If omitted, use standard output.", category: "common"} reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created", category: "common"} - no_reference: {description: "Detect alleles without requiring a reference, at the expense of phasing quality (in particular for long reads)", category: "common"} tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS)", category: "common"} - output_read_list: {description: "Write reads that have been used for phasing to FILE.", category: "advanced"} algorithm: {description: "Phasing algorithm to use (default: {description: whatshap)", category: "advanced"} - merge_reads: {description: "Merge reads which are likely to come from the same haplotype (default: {description: do not merge reads)", category: "common"} - internal_downsampling: {description: "Coverage reduction parameter in the internal core phasing algorithm. Higher values increase runtime *exponentially* while possibly improving phasing quality marginally. Avoid using this in the normal case! (default: {description: 15)", category: "advanced"} - mapping_quality: {description: "Minimum mapping quality (default: {description: 20)", category: "common"} indels: {description: "Also phase indels (default: {description: do not phase indels)", category: "common"} - ignore_read_groups: {description: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample.", category: "advanced"} sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} chromosome: {description: "Name of chromosome to phase. If not given, all chromosomes in the input VCF are phased. Can be used multiple times.", category: "common"} - error_rate: {description: "The probability that a nucleotide is wrong in read merging model (default: {description: 0.15).", category: "advanced"} - maximum_error_rate: {description: "The maximum error rate of any edge of the read merging graph before discarding it (default: {description: 0.25).", category: "advanced"} threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from the same haplotype and different haplotypes in the read merging model (default: {description: 1000000).", category: "advanced"} - negative_threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from different haplotypes and the same haplotype in the read merging model (default: {description: 1000).", category: "advanced"} - full_genotyping: {description: "Completely re-genotype all variants based on read data, ignores all genotype data that might be present in the VCF (EXPERIMENTAL FEATURE).", category: "experimental"} - distrust_genotypes: {description: "Allow switching variants from hetero- to homozygous in an optimal solution (see documentation).", category: "advanced"} - include_homozygous: {description: "Also work on homozygous variants, which might be turned to heterozygous", category: "advanced"} - default_gq: {description: "Default genotype quality used as cost of changing a genotype when no genotype likelihoods are available (default 30)", category: "advanced"} - gl_regularize_r: {description: "Constant (float) to be used to regularize genotype likelihoods read from input VCF (default None).", category: "advanced"} - changed_genotype_list: {description: "Write list of changed genotypes to FILE.", category: "advanced"} ped: {description: "Use pedigree information in PED file to improve phasing (switches to PedMEC algorithm). Columns 2, 3, 4 must refer to child, mother, and father sample names as used in the VCF and BAM/CRAM. Other columns are ignored.", category: "advanced"} - recombination_list: {description: "Write putative recombination events to FILE.", category: "advanced"} - recomb_rate: {description: "Recombination rate in cM/Mb (used with --ped). If given, a constant recombination rate is assumed (default: {description: 1.26cM/Mb).", category: "advanced"} - gen_map: {description: "File with genetic map (used with --ped) to be used instead of constant recombination rate, i.e. overrides option --recombrate.", category: "advanced"} - no_genetic_haplo_typing: {description: "Do not merge blocks that are not connected by reads (i.e. solely based on genotype status). Default: {description: when in --ped mode, merge all blocks that contain at least one homozygous genotype in at least one individual into one block.", category: "advanced"} - use_ped_samples: {description: "Only work on samples mentioned in the provided PED file.", category: "advanced"} vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed)", category: "required"} vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased", category: "required"} phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF)", category: "required"} @@ -154,10 +94,8 @@ task Stats { input { String? gtf String? sample - String? chr_lengths String? tsv - Boolean? only_sn_vs - String? block_list + String? blockList String? chromosome File vcf @@ -168,21 +106,19 @@ task Stats { } command { - whatshap stats \ + whatshap stats \ ~{vcf} \ ~{if defined(gtf) then ("--gtf " + '"' + gtf + '"') else ""} \ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ - ~{if defined(chr_lengths) then ("--chr-lengths " + '"' + chr_lengths + '"') else ""} \ ~{if defined(tsv) then ("--tsv " + '"' + tsv + '"') else ""} \ - ~{true="--only-snvs" false="" only_sn_vs} \ - ~{if defined(block_list) then ("--block-list " + '"' + block_list + '"') else ""} \ + ~{if defined(blockList) then ("--block-list " + '"' + blockList + '"') else ""} \ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} } output { - File? phasedGTF = gtf - File? phasedTSV = tsv - File? phasedBlockList = block_list + File? phasedGTF = gtf + File? phasedTSV = tsv + File? phasedBlockList = blockList } runtime { @@ -194,10 +130,8 @@ task Stats { parameter_meta { gtf: "Write phased blocks to GTF file." sample: "Name of the sample to process. If not given, use first sample found in VCF." - chr_lengths: "File with chromosome lengths (one line per chromosome, tab separated '<chr> <length>') needed to compute N50 values." tsv: "Filename to write statistics to (tab-separated)." - only_sn_vs: "Only process SNVs and ignore all other variants." - block_list: "Filename to write list of all blocks to (one block per line)." + blockList: "Filename to write list of all blocks to (one block per line)." chromosome: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered." vcf: "Phased VCF file" memory: {description: "The amount of memory this job will use.", category: "advanced"} @@ -212,12 +146,7 @@ task Haplotag { File? reference File? referenceFastaIndex String? regions - Boolean? ignore_linked_read - String? linked_read_distance_cut_off - Boolean? ignore_read_groups String? sample - String? output_haplo_tag_list - Boolean? tag_supplementary File vcf File vcfIndex File alignments @@ -230,24 +159,19 @@ task Haplotag { } command { - whatshap haplotag \ + whatshap haplotag \ ~{vcf} \ ~{alignments} \ ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ - ~{true="--ignore-linked-read" false="" ignore_linked_read} \ - ~{if defined(linked_read_distance_cut_off) then ("--linked-read-distance-cutoff " + '"' + linked_read_distance_cut_off + '"') else ""} \ - ~{true="--ignore-read-groups" false="" ignore_read_groups} \ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ - ~{if defined(output_haplo_tag_list) then ("--output-haplotag-list " + '"' + output_haplo_tag_list + '"') else ""} \ - ~{true="--tag-supplementary" false="" tag_supplementary} && \ python3 -c "import pysam; pysam.index('~{outputFile}')" } output { - File bam = outputFile - File bamIndex = outputFile + ".bai" + File bam = outputFile + File bamIndex = outputFile + ".bai" } runtime { @@ -258,16 +182,14 @@ task Haplotag { parameter_meta { outputFile: "Output file. If omitted, use standard output." - reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created" + reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created." + referenceIndex: "Index for the reference file." regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." - ignore_linked_read: "Ignore linkage information stored in BX tags of the reads." - linked_read_distance_cut_off: "Assume reads with identical BX tags belong to different read clouds if their distance is larger than LINKEDREADDISTANCE (default: 50000)." - ignore_read_groups: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample." sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." - output_haplo_tag_list: "Write assignments of read names to haplotypes (tab separated) to given output file. If filename ends in .gz, then output is gzipped." - tag_supplementary: "Also tag supplementary alignments. Supplementary alignments are assigned to the same haplotype the primary alignment has been assigned to (default: only tag primary alignments)." - vcf: "VCF file with phased variants (must be gzip-compressed and indexed)" - alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype" + vcf: "VCF file with phased variants (must be gzip-compressed and indexed)." + vcfIndex: "Index for the VCF or BCF file with variants to be phased." + alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype." + alignmentsIndex: "Index for the alignment file." memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} -- GitLab