Skip to content
Snippets Groups Projects
Unverified Commit 5b7e752f authored by Cats's avatar Cats Committed by GitHub
Browse files

Merge branch 'develop' into BIOWDL-78

parents 6ac3ff86 dbee4846
No related branches found
No related tags found
1 merge request!74add SomaticSeq
CPAT.wdl 0 → 100644
version 1.0
task CPAT {
input {
String? preCommand
File gene
String outFilePath
String hex
String logitModel
File? referenceGenome
File? referenceGenomeIndex # Should be added as input if
# CPAT should not index the reference genome.
Array[String]? startCodons
Array[String]? stopCodons
}
# Some WDL magic in the command section to properly output the start and stopcodons to the command.
# select_first is needed in order to convert the optional arrays to non-optionals.
command {
set -e -o pipefail
mkdir -p $(dirname ~{outFilePath})
~{preCommand}
cpat.py \
--gene ~{gene} \
--outfile ~{outFilePath} \
--hex ~{hex} \
--logitModel ~{logitModel} \
~{"--ref " + referenceGenome} \
~{true="--start" false="" defined(startCodons)} ~{sep="," select_first([startCodons, [""]])} \
~{true="--stop" false="" defined(stopCodons)} ~{sep="," select_first([stopCodons, [""]])}
}
output {
File outFile=outFilePath
}
}
# There is also make_hexamer_tab.py and make_logitModel.py
# that can be added as tasks here.
\ No newline at end of file
version 1.0
task GffCompare {
input {
String? preCommand
File? inputGtfList
Array[File] inputGtfFiles
File referenceAnnotation
String? outputDir
String outPrefix = "gffcmp" # gffcmp is the default used by the program as well. This needs to be
# defined in order for the output values to be consistent and correct.
File? genomeSequences
Int? maxDistanceFreeEndsTerminalExons
Int? maxDistanceGroupingTranscriptStartSites
String? namePrefix
Boolean C = false
Boolean A = false
Boolean X = false
Boolean K = false
Boolean snCorrection = false
Boolean precisionCorrection = false
Boolean discardSingleExonTransfragsAndReferenceTranscripts = false
Boolean discardSingleExonReferenceTranscripts = false
Boolean noTmap = false
Boolean verbose = false
Boolean debugMode = false
# This workaround only works in the input section.
# Issue addressed at https://github.com/openwdl/wdl/pull/263
File? noneFile # This is a wdl workaround. Please do not assign!
}
# This allows for the creation of output directories"
String dirPrefix= if defined(outputDir) then outputDir + "/" else ""
String totalPrefix = dirPrefix + outPrefix
parameter_meta {}
command {
set -e
~{preCommand}
~{"mkdir -p " + outputDir}
gffcompare \
-r ~{referenceAnnotation} \
~{"-o '" + totalPrefix + "'"} \
~{"-s " + genomeSequences} \
~{"-e " + maxDistanceFreeEndsTerminalExons} \
~{"-d " + maxDistanceGroupingTranscriptStartSites} \
~{"-p " + namePrefix} \
~{true="-C" false="" C} \
~{true="-A" false="" A} \
~{true="-X" false="" X} \
~{true="-K" false="" K} \
~{true="-R" false="" snCorrection} \
~{true="-Q" false="" precisionCorrection} \
~{true="-M" false="" discardSingleExonTransfragsAndReferenceTranscripts} \
~{true="-N" false="" discardSingleExonReferenceTranscripts} \
~{true="-T" false="" noTmap} \
~{true="-V" false="" verbose} \
~{true="D" false="" debugMode} \
~{"-i " + inputGtfList} \
~{sep=" " inputGtfFiles}
}
# Output of gffcompare is not stable. It depends on the number of files in the input.
Int noFilesGtfList = if defined(inputGtfList) then length(read_lines(select_first([inputGtfList]))) else 0
Int noInputFiles = length(inputGtfFiles)
Boolean oneFile = (noFilesGtfList + noInputFiles) == 1
String annotatedName = if oneFile then "annotated" else "combined"
# Check if a redundant .gtf will be created
Boolean createRedundant = C || A || X
output {
File annotated = totalPrefix + "." + annotatedName + ".gtf"
File loci = totalPrefix + ".loci"
File stats = totalPrefix + ".stats"
File tracking = totalPrefix + ".tracking"
# noneFile is not stable. Please replace this as soon as wdl spec allows
File? redundant = if createRedundant then totalPrefix + ".redundant.gtf" else noneFile
File? missedIntrons = if debugMode then totalPrefix + ".missed_introns.gtf" else noneFile
}
}
\ No newline at end of file
version 1.0
task GffRead {
input {
String? preCommand
File inputGff
File genomicSequence
File? genomicIndex # Optional. GFFRead can create this by itself.
String? exonsFastaPath
String? CDSFastaPath
String? proteinFastaPath
String? filteredGffPath
Boolean outputGtfFormat = false
}
# The mkdirs below are hackish. it should be
# ~{"mkir -p $(dirname " + somePath + ")"}
# but this goes wrong. Cromwell will always use ')' even if somepath is not defined. Which leads to crashing.
command {
set -e -o pipefail
~{preCommand}
~{"mkdir -p $(dirname " + CDSFastaPath}~{true=")" false="" defined(CDSFastaPath)}
~{"mkdir -p $(dirname " + exonsFastaPath}~{true=")" false="" defined(exonsFastaPath)}
~{"mkdir -p $(dirname " + proteinFastaPath}~{true=")" false="" defined(proteinFastaPath)}
~{"mkdir -p $(dirname " + filteredGffPath}~{true=")" false="" defined(filteredGffPath)}
gffread \
~{inputGff} \
-g ~{genomicSequence} \
${"-w " + exonsFastaPath} \
${"-x " + CDSFastaPath} \
${"-y " + proteinFastaPath} \
${"-o " + filteredGffPath} \
${true="-T " false="" outputGtfFormat}
}
output {
File? exonsFasta = exonsFastaPath
File? CDSFasta = CDSFastaPath
File? proteinFasta = proteinFastaPath
File? filteredGff = filteredGffPath
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment