Skip to content
Snippets Groups Projects
Commit 4795938f authored by Cats's avatar Cats
Browse files

Merge branch 'master' into STAR_limitBAMsortRAM

parents 0c6f1f23 2aff5d1d
No related branches found
No related tags found
1 merge request!14add limitBAMsortRAM to STAR and make sampleConfig write to file
MIT License
Copyright (c) 2018 Peter van 't Hof
Copyright (c) Sequencing Analysis Support Core - Leiden University Medical Center
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
......
......@@ -3,6 +3,7 @@ task BwaMem {
File inputR1
File? inputR2
String referenceFasta
Array[File] indexFiles # These indexFiles need to be added, otherwise cromwell will not find them.
String outputPath
String? readgroup
......@@ -26,3 +27,39 @@ task BwaMem {
memory: if defined(memory) then memory else 8
}
}
task index {
File fasta
String? preCommand
String? constructionAlgorithm
Int? blockSize
String? outputDir
String fastaFilename = basename(fasta)
command {
set -e -o pipefail
${"mkdir -p " + outputDir}
${preCommand}
if [[ ! '${outputDir}' = '' ]]
then
ln -sf ${fasta} ${outputDir + "/"}${fastaFilename}
fi
bwa index \
${"-a " + constructionAlgorithm} \
${"-b" + blockSize} \
${outputDir + "/"}${fastaFilename}
}
output {
File indexBase = if (defined(outputDir)) then select_first([outputDir]) + "/" + fastaFilename else fastaFilename
File indexedFasta = indexBase
Array[File] indexFiles = [indexBase + ".bwt",indexBase + ".pac",indexBase + ".sa",indexBase + ".amb",indexBase + ".ann"]
}
parameter_meta {
fasta: "Fasta file to be indexed"
constructionAlgorithm: "-a STR BWT construction algorithm: bwtsw, is or rb2 [auto]"
blockSize: "-b INT block size for the bwtsw algorithm (effective with -a bwtsw) [10000000]"
outputDir: "index will be created in this output directory"
}
}
......@@ -24,10 +24,7 @@ task download {
command {
set -e -o pipefail
${preCommand}
${'if [ ! -f ' + seqTaxMapPath +
' ]; then mkdir -p ' + seqTaxMapPath +
'; rm -d ' + seqTaxMapPath +
'; fi' }
${"mkdir -p $(dirname " + seqTaxMapPath + ")"}
${executable} \
-o ${libraryPath} \
${true='-d ' false='' defined(domain)}${sep=',' domain} \
......@@ -92,8 +89,7 @@ task build {
command {
set -e -o pipefail
${preCommand}
mkdir -p ${centrifugeIndexBase}
rm -d ${centrifugeIndexBase}
${"mkdir -p $(dirname " + centrifugeIndexBase + ")"}
${centrifugeBuildExecutable} \
${true='--large-index' false='' largeIndex} \
${true='--noauto' false='' noAuto} \
......
......@@ -51,12 +51,13 @@ task concatenateTextFiles {
Array[File] fileList
String combinedFilePath
Boolean? unzip=false
Boolean? zip=false
command {
mkdir -p ${combinedFilePath}
rm -d ${combinedFilePath}
set -e -o pipefail
${"mkdir -p $(dirname " + combinedFilePath + ")"}
${true='zcat' false= 'cat' unzip} ${sep=' ' fileList} \
> ${combinedFilePath}
${true="| gzip -c" false="" zip} > ${combinedFilePath}
}
output {
......
......@@ -6,13 +6,13 @@ task cutadapt {
String? format
String? preCommand
Int? cores = 1
String? memory = "4G"
Array[String]? adapter
Array[String]? front
Array[String]? anywhere
Array[String]? adapterRead2
Array[String]? frontRead2
Array[String]? anywhereRead2
Int? memory = 4
Array[String]+? adapter
Array[String]+? front
Array[String]+? anywhere
Array[String]+? adapterRead2
Array[String]+? frontRead2
Array[String]+? anywhereRead2
Boolean? interleaved
String? pairFilter
Float? errorRate
......@@ -59,7 +59,9 @@ task cutadapt {
command {
set -e -o pipefail
mkdir -p $(dirname ${read1output})
${"mkdir -p $(dirname " + read1output + ")"}
${"mkdir -p $(dirname " + read2output + ")"}
${"mkdir -p $(dirname " + reportPath + ")"}
${preCommand}
cutadapt \
${"--cores=" + cores} \
......@@ -120,4 +122,4 @@ task cutadapt {
cpu: select_first([cores])
memory: select_first([memory])
}
}
\ No newline at end of file
}
......@@ -15,7 +15,11 @@ task fastqc {
File? limits
Int? kmers
String? dir
# Chops of the .gz extension if present.
String name = sub(seqFile, "\\.gz$","")
# This regex chops of the extension and replaces it with _fastqc for the reportdir.
# Just as fastqc does it.
String reportDir = outdirPath + "/" + sub(basename(name), "\\.[^\\.]*$", "_fastqc")
command {
set -e -o pipefail
${preCommand}
......@@ -40,10 +44,10 @@ task fastqc {
}
output {
File rawReport = select_first(glob(outdirPath + "/*/fastqc_data.txt"))
File htmlReport = select_first(glob(outdirPath + "/*/fastqc_report.html"))
File summary = select_first(glob(outdirPath + "/*/summary.txt"))
Array[File] images = glob(outdirPath + "/*/Images/*.png")
File rawReport = reportDir + "/fastqc_data.txt"
File htmlReport = reportDir + "/fastqc_report.html"
File summary = reportDir + "/summary.txt"
Array[File] images = glob(reportDir + "/Images/*.png")
}
runtime {
......@@ -113,4 +117,4 @@ task getConfiguration {
runtime {
memory: 1
}
}
\ No newline at end of file
}
......@@ -65,3 +65,55 @@ task Flagstat {
File flagstat = outputPath
}
}
task fastq {
String? preCommand
File inputBam
String outputRead1
String? outputRead2
String? outputRead0
Int? includeFilter
Int? excludeFilter
Int? excludeSpecificFilter
Boolean? appendReadNumber
Boolean? outputQuality
Int? compressionLevel
Int? threads
Int? memory
Int totalThreads = select_first([threads, 1])
command {
${preCommand}
samtools fastq \
${true="-1" false="-s" defined(outputRead2)} ${outputRead1} \
${"-2 " + outputRead2} \
${"-0 " + outputRead0} \
${"-f " + includeFilter} \
${"-F " + excludeFilter} \
${"-G " + excludeSpecificFilter} \
${true="-N" false="-n" appendReadNumber} \
${true="-O" false="" outputQuality} \
${"-c " + compressionLevel} \
${"--threads " + totalThreads} \
${inputBam}
}
output {
File read1 = outputRead1
File? read2 = outputRead2
File? read0 = outputRead0
}
runtime {
cpu: totalThreads
memory: select_first([memory, 1])
}
parameter_meta {
preCommand: "A command that is run before the task. Can be used to activate environments"
inputBam: "The bam file to process."
outputRead1: "If only outputRead1 is given '-s' flag is assumed. Else '-1'."
includeFilter: "Include reads with ALL of these flags. Corresponds to '-f'"
excludeFilter: "Exclude reads with ONE OR MORE of these flags. Corresponds to '-F'"
excludeSpecificFilter: "Exclude reads with ALL of these flags. Corresponds to '-G'"
appendReadNumber: "Append /1 and /2 to the read name, or don't. Corresponds to '-n/N"
}
}
task sample {
File sequenceFile
String? outFilePath = "subsampledReads.fq.gz"
String? preCommand
Int? seed
Boolean? twoPassMode
Float? fraction
Int? number
Boolean? zip = true
command {
set -e -o pipefail
${'mkdir -p $(dirname ' + outFilePath + ')'}
${preCommand}
seqtk sample \
${"-s " + seed} \
${true="-2 " false="" twoPassMode} \
${sequenceFile} \
${number} ${fraction} \
${true="| gzip" false="" zip} \
${"> " + outFilePath}
}
output {
File subsampledReads= select_first([outFilePath])
}
}
\ No newline at end of file
task spades {
String outputDir
String? preCommand
File read1
File? read2
File? interlacedReads
File? sangerReads
File? pacbioReads
File? nanoporeReads
File? tslrContigs
File? trustedContigs
File? untrustedContigs
Boolean? singleCell
Boolean? metagenomic
Boolean? rna
Boolean? plasmid
Boolean? ionTorrent
Boolean? onlyErrorCorrection
Boolean? onlyAssembler
Boolean? careful
Boolean? disableGzipOutput
Boolean? disableRepeatResolution
File? dataset
Int? threads
Int? memoryGb
File? tmpDir
String? k
Float? covCutoff
Int? phredOffset
Int finalThreads = select_first([threads,1])
Int totalMemory = select_first([memoryGb, finalThreads * 16])
Int clusterMemory = totalMemory / finalThreads
command {
set -e -o pipefail
${preCommand}
spades.py \
${"-o " + outputDir} \
${true="--sc" false="" singleCell} \
${true="--meta" false="" metagenomic} \
${true="--rna" false="" rna} \
${true="--plasmid" false="" plasmid} \
${true="--iontorrent" false="" ionTorrent} \
${"--12 " + interlacedReads } \
${true="-1" false="-s" defined(read2)} ${read1} \
${"-2 " + read2 } \
${"--sanger " + sangerReads } \
${"--pacbio " + pacbioReads } \
${"--nanopore " + nanoporeReads } \
${"--tslr " + tslrContigs } \
${"--trusted-contigs " + trustedContigs } \
${"--untrusted-contigs " + untrustedContigs } \
${true="--only-error-correction" false="" onlyErrorCorrection } \
${true="--only-assembler" false="" onlyAssembler } \
${true="--careful" false="" careful } \
${true="--disable-gzip-output" false="" disableGzipOutput} \
${true="--disable-rr" false="" disableRepeatResolution } \
${"--dataset " + dataset } \
${"--threads " + finalThreads} \
${"--memory " + totalMemory } \
${"-k " + k } \
${"--cov-cutoff " + covCutoff } \
${"--phred-offset " + phredOffset }
}
output {
Array[File] correctedReads = glob(outputDir + "/corrected/*.fastq*")
File scaffolds = outputDir + "/scaffolds.fasta"
File contigs = outputDir + "/contigs.fasta"
File assemblyGraphWithScaffoldsGfa = outputDir + "/assembly_graph_with_scaffolds.gfa"
File assemblyGraphFastg = outputDir + "/assembly_graph.fastg"
File contigsPaths = outputDir + "/contigs.paths"
File scaffoldsPaths = outputDir + "/scaffolds.paths"
File params = outputDir + "/params.txt"
File log = outputDir + "/spades.log"
}
runtime {
cpu: finalThreads
memory: clusterMemory
}
}
\ No newline at end of file
task unicycler {
String? preCommand
File? short1
File? short2
File? unpaired
File? long
String out
Int? verbosity
Int? minFastaLength
Int? keep
Boolean? vcf
Int? threads
Int? memory
Int finalThreads = select_first(threads, 1)
Int finalMemory = select_first(memory, 4)
String? mode
Float? minBridgeQual
Int? linearSeqs
File? spadesPath
Boolean? noCorrect
Float? minKmerFrac
Float? maxKmerFrac
Int? kmerCount
Float? depthFilter
Boolean? noMiniasm
File? raconPath
File? existingLongReadAssembly
Boolean? noRotate
File? startGenes
Float? startGeneId
Float? startGeneCov
String? makeblastdbPath
File? tblastnPath
Boolean? noPilon
File? bowtie2Path
File? bowtie2buildPath
File? samtoolsPath
File? pilonPath
File? javaPath
Int? minPolishSize
File? bcftoolsPath
Int? minComponentSize
Int? minDeadEndSize
File? contamination
String? scores
String? lowScore
command {
set -e -o pipefail
mkdir -p ${out}
${preCommand}
unicycler \
${"--short1 " + short1} \
${"--short2 " + short2} \
${"--unpaired " + unpaired} \
${"--long " + long} \
--out ${out} \
${"--min_fasta_length " + minFastaLength} \
${"--keep " + keep } \
${true="--vcf" false="" vcf } \
${"--threads " + finalThreads } \
${"--mode " + mode } \
${"--min_bridge_qual " + minBridgeQual } \
${"--linear_seqs " + linearSeqs } \
${"--spades_path " + spadesPath } \
${true="--no_correct" false="" noCorrect } \
${"--min_kmer_frac " + minKmerFrac } \
${"--max_kmer_frac " + maxKmerFrac } \
${"--kmer_count " + kmerCount } \
${"--depth_filter " + depthFilter } \
${true="--no_miniasm" false="" noMiniasm } \
${"--racon_path " + raconPath } \
${"--existing_long_read_assembly " + existingLongReadAssembly } \
${true="--no_rotate" false="" noRotate } \
${"--start_genes " + startGenes } \
${"--start_gene_id " + startGeneId } \
${"--start_gene_cov " + startGeneCov } \
${"--makeblastdb_path " + makeblastdbPath } \
${"--tblastn_path " + tblastnPath } \
${true="--no_pilon" false="" noPilon } \
${"--bowtie2_path " + bowtie2Path } \
${"--bowtie2_build_path " + bowtie2buildPath } \
${"--samtools_path " + samtoolsPath } \
${"--pilon_path " + pilonPath } \
${"--java_path " + javaPath } \
${"--min_polish_size " + minPolishSize } \
${"--bcftools_path " + bcftoolsPath } \
${"--min_component_size " + minComponentSize } \
${"--min_dead_end_size " + minDeadEndSize } \
${"--contamination " + contamination } \
${"--scores " + scores } \
${"--low_score " + lowScore }
}
output {
File assemblyFasta = out + "/assembly.fasta"
File assemblyGfa = out + "/assembly.gfa"
File log = out + "/unicycler.log"
}
runtime {
cpu: finalThreads
memory: finalMemory
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment