From 9cb010e94e60504548d66f7d7e75661e3533f5ab Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Tue, 17 May 2016 13:27:23 +0200 Subject: [PATCH] Add gtf to refflat --- .../nl/lumc/sasc/biopet/extensions/Awk.scala | 41 +++++++++++++++++++ .../biopet/extensions/GtfToGenePred.scala | 39 ++++++++++++++++++ .../generateindexes/GenerateIndexes.scala | 7 +++- 3 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Awk.scala create mode 100644 biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/GtfToGenePred.scala diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Awk.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Awk.scala new file mode 100644 index 000000000..594e6f564 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Awk.scala @@ -0,0 +1,41 @@ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version} +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{Input, Output} + +import scala.util.matching.Regex + +/** + * Created by pjvan_thof on 17-5-16. + */ +class Awk(val root: Configurable) extends BiopetCommandLineFunction with Version { + executable = config("exe", default = "awk", freeVar = false) + + def versionCommand: String = executable + " --version" + + def versionRegex: Regex = """(GNU Awk \d+\.\d+\.\d+)""".r + + @Input(required = false) + var input: File = _ + + @Output + var output: File = _ + + var command: String = _ + + def cmdLine = executable + + required(command) + + (if (inputAsStdin) "" else required(input)) + + (if (outputAsStsout) "" else " > " + required(output)) +} + +object Awk { + def apply(root: Configurable, command: String): Awk = { + val awk = new Awk(root) + awk.command = command + awk + } +} \ No newline at end of file diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/GtfToGenePred.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/GtfToGenePred.scala new file mode 100644 index 000000000..3593ef84b --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/GtfToGenePred.scala @@ -0,0 +1,39 @@ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +/** + * Created by pjvan_thof on 17-5-16. + */ +class GtfToGenePred(val root: Configurable) extends BiopetCommandLineFunction { + executable = config("exe", default = "gtfToGenePred", freeVar = false) + + @Input + var inputGtfs: List[File] = Nil + + @Output + var outputGenePred: File = _ + + @Output + var infoOut: Option[File] = None + + var genePredExt: Boolean = config("gene _pred _ext", default = false) + var allErrors: Boolean = config("all_errors", default = false) + var impliedStopAfterCds: Boolean = config("implied_stop_after_cds", default = false) + var simple: Boolean = config("simple", default = false) + var geneNameAsName2: Boolean = config("gene _name_as_name2", default = false) + + def cmdLine = executable + + conditional(genePredExt, "-genePredExt") + + conditional(allErrors, "-allErrors") + + optional("-infoOut", infoOut) + + conditional(allErrors, "-allErrors") + + conditional(impliedStopAfterCds, "-impliedStopAfterCds") + + conditional(simple, "-simple") + + conditional(geneNameAsName2, "-geneNameAsName2") + + (if (outputAsStsout) "" else " > " + required(outputGenePred)) +} diff --git a/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/GenerateIndexes.scala b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/GenerateIndexes.scala index f4a0e0406..38eb6178c 100644 --- a/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/GenerateIndexes.scala +++ b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/GenerateIndexes.scala @@ -208,9 +208,12 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript val refFlatFile: Option[File] = gtfFile.map { gtf => val refFlat = new File(gtf + ".refFlat") - //TODO: gtf to refFlat conversion + val gtfToGenePred = new GtfToGenePred(this) + gtfToGenePred.inputGtfs :+= gtf - outputConfig += "ribosome_refflat" -> refFlat + add(gtfToGenePred | Awk(this, """{ print $12"\t"$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10 }""") > refFlat) + + outputConfig += "annotation_refflat" -> refFlat refFlat } -- GitLab