diff --git a/CHANGELOG.md b/CHANGELOG.md index e9805814a7b8ff3e9ef195f9139dc4ac558fa782..1dca7ae81f07019e24764e5f6927f8508cd3b374 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.0.0 --------------------------- ++ TranscriptClean: Update TranscriptClean to version 2.0.2 + Memory runtime attributes are now Strings indicating total memory, as opposed to Ints indicating memory per core. + Memory inputs for most tasks are now Strings, remaining Int memory inputs are renamed to "memoryGb". + Use the biowdl-input-converter container for JsonToYaml, to reduce the amount of containers needed. diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 48f5c525d8dc1cbb0b368be391e64b040e03783a..f0053b25e9cb73f780180ca9a0090a4845bd9634 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -20,88 +20,6 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task CleanSpliceJunctions { - input { - File SAMfile - File referenceGenome - String outputPrefix - File spliceJunctionAnnotation - - File? variantFile - - Int cores = 1 - String memory = "4G" - String dockerImage = "biocontainers/transcriptclean:v1.0.8_cv1" - } - - command { - set -e - mkdir -p $(dirname ~{outputPrefix}) - clean_splice_jns \ - ~{"--f=" + SAMfile} \ - ~{"--g=" + referenceGenome} \ - ~{"--o=" + outputPrefix} \ - ~{"--s=" + spliceJunctionAnnotation} \ - ~{"--v=" + variantFile} - } - - output { - File outputCleanedSAM = outputPrefix + "_clean.sam" - } - - runtime { - cpu: cores - memory: memory - docker: dockerImage - } - - parameter_meta { - SAMfile: "Input SAM file" - referenceGenome: "Reference genome fasta file." - outputPrefix: "Output directory path + output file prefix." - spliceJunctionAnnotation: "Splice junction file" - variantFile: "VCF formatted file of variants" - - outputCleanedSAM: "Cleaned sam output file." - } -} - -task GetCorrectedSJsFromLog { - input { - File TElogFile - String outputPrefix - - Int cores = 1 - String memory = "5G" - String dockerImage = "biocontainers/transcriptclean:v1.0.8_cv1" - } - - command { - set -e - mkdir -p $(dirname ~{outputPrefix}) - get_corrected_SJs_from_log \ - ~{TElogFile} \ - ~{outputPrefix + ".tsv"} - } - - output { - File outputCorrectedSJs = outputPrefix + ".tsv" - } - - runtime { - cpu: cores - memory: memory - docker: dockerImage - } - - parameter_meta { - TElogFile: "TE log from TranscriptClean." - outputPrefix: "Output directory path + output file prefix." - - outputCorrectedSJs: "Formely noncanonical splice junctions in BED format." - } -} - task GetSJsFromGtf { input { File GTFfile @@ -111,7 +29,7 @@ task GetSJsFromGtf { Int cores = 1 String memory = "8G" - String dockerImage = "biocontainers/transcriptclean:v1.0.8_cv1" + String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } command { @@ -151,7 +69,7 @@ task GetTranscriptCleanStats { Int cores = 1 String memory = "4G" - String dockerImage = "biocontainers/transcriptclean:v1.0.8_cv1" + String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } command { @@ -189,16 +107,18 @@ task TranscriptClean { Int maxSJoffset = 5 Boolean correctMismatches = true Boolean correctIndels = true + Boolean correctSJs = true Boolean dryRun = false Boolean primaryOnly = false + Boolean canonOnly = false + Int bufferSize = 100 File? spliceJunctionAnnotation File? variantFile - Boolean? correctSJs Int cores = 1 String memory = "25G" - String dockerImage = "biocontainers/transcriptclean:v1.0.8_cv1" + String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } command { @@ -212,11 +132,14 @@ task TranscriptClean { ~{"-v " + variantFile} \ ~{"--maxLenIndel=" + maxLenIndel} \ ~{"--maxSJOffset=" + maxSJoffset} \ - ~{true="-m CORRECTMISMATCHES" false="-m false" correctMismatches} \ - ~{true="-i CORRECTINDELS" false="-i false" correctIndels} \ - ~{true="--correctSJs=CORRECTSJS" false="--correctSJs=false" correctSJs} \ + ~{true="-m true" false="-m false" correctMismatches} \ + ~{true="-i true" false="-i false" correctIndels} \ + ~{true="--correctSJs=true" false="--correctSJs=false" correctSJs} \ ~{true="--dryRun" false="" dryRun} \ - ~{true="--primaryOnly" false="" primaryOnly} + ~{true="--primaryOnly" false="" primaryOnly} \ + ~{true="--canonOnly" false="" canonOnly} \ + ~{"--bufferSize=" + bufferSize} \ + ~{"-t " + cores} } output { @@ -236,7 +159,8 @@ task TranscriptClean { SAMfile: "Input SAM file containing transcripts to correct." referenceGenome: "Reference genome fasta file." outputPrefix: "Output directory path + output file prefix." - spliceJunctionAnnotation: "Splice junction file" + spliceJunctionAnnotation: "Splice junction file." + variantFile: "VCF formatted file of variants." maxLenIndel: "Maximum size indel to correct." maxSJoffset: "Maximum distance from annotated splice junction to correct." correctMismatches: "Set this to make TranscriptClean correct mismatches." @@ -244,6 +168,8 @@ task TranscriptClean { correctSJs: "Set this to make TranscriptClean correct splice junctions." dryRun: "TranscriptClean will read in the data but don't do any correction." primaryOnly: "TranscriptClean will only output primary mappings of transcripts." + canonOnly: "TranscriptClean will output only canonical transcripts and transcript containing annotated noncanonical junctions." + bufferSize: "Number of lines to output to file at once by each thread during run." outputTranscriptCleanFasta: "Fasta file containing corrected reads." outputTranscriptCleanLog: "Log file of TranscriptClean run."