diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeOtuMaps.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeOtuMaps.scala new file mode 100644 index 0000000000000000000000000000000000000000..70f452b2537ce6943fd164d7dc1963d8c611f450 --- /dev/null +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeOtuMaps.scala @@ -0,0 +1,40 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.tools + +import java.io.File + +import nl.lumc.sasc.biopet.core.ToolCommandFunction +import nl.lumc.sasc.biopet.core.summary.Summarizable +import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +class MergeOtuMaps(val root: Configurable) extends ToolCommandFunction { + def toolObject = nl.lumc.sasc.biopet.tools.MergeOtuMaps + + @Input(doc = "Input", shortName = "input", required = true) + var input: List[File] = Nil + + @Output(doc = "Output", shortName = "output", required = true) + var output: File = _ + + override def defaultCoreMemory = 6.0 + + override def cmdLine = super.cmdLine + repeat("-I", input) + required("-o", output) + +} + diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeOtuMaps.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeOtuMaps.scala new file mode 100644 index 0000000000000000000000000000000000000000..78bc62cfe7d55e6a3e01b92a4a0bae663e997e56 --- /dev/null +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeOtuMaps.scala @@ -0,0 +1,49 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.{ PrintWriter, File } + +import nl.lumc.sasc.biopet.utils.ToolCommand + +import scala.io.Source + +/** + * Created by pjvan_thof on 12/18/15. + */ +object MergeOtuMaps extends ToolCommand { + case class Args(inputFiles: List[File] = Nil, outputFile: File = null) extends AbstractArgs + + class OptParser extends AbstractOptParser { + opt[File]('I', "input") minOccurs 2 required () unbounded () valueName "<file>" action { (x, c) => + c.copy(inputFiles = x :: c.inputFiles) + } + opt[File]('o', "output") required () unbounded () maxOccurs 1 valueName "<file>" action { (x, c) => + c.copy(outputFile = x) + } + } + + /** + * @param args the command line arguments + */ + def main(args: Array[String]): Unit = { + val argsParser = new OptParser + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + + var map: Map[Long, String] = Map() + + for (inputFile <- commandArgs.inputFiles) { + logger.info(s"Start reading $inputFile") + val reader = Source.fromFile(inputFile) + reader.getLines().foreach { line => + val values = line.split("\t", 2) + val key = values.head.toLong + map += key -> (line.stripPrefix(s"$key") + map.getOrElse(key, "")) + } + reader.close() + } + + logger.info(s"Start writing to ${commandArgs.outputFile}") + val writer = new PrintWriter(commandArgs.outputFile) + map.foreach { case (key, list) => writer.println(key + list) } + writer.close() + } +} diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala index fd0eb8e6fff67cf01fac857927074b23b5a518c4..ca2c777f9f62241502614019e892435a03672b4f 100644 --- a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala +++ b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala @@ -1,8 +1,9 @@ package nl.lumc.sasc.biopet.pipelines.gears import nl.lumc.sasc.biopet.core.{ PipelineCommand, MultiSampleQScript } +import nl.lumc.sasc.biopet.extensions.tools.MergeOtuMaps import nl.lumc.sasc.biopet.extensions.{ Gzip, Zcat, Ln } -import nl.lumc.sasc.biopet.extensions.qiime.{ MergeOtuMaps, MergeOtuTables } +import nl.lumc.sasc.biopet.extensions.qiime.MergeOtuTables import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript @@ -56,7 +57,7 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs val mergeMaps = new MergeOtuMaps(qscript) mergeMaps.input = closedOtuMaps - mergeMaps.outputFile = closedOtuMap + mergeMaps.output = closedOtuMap add(mergeMaps) } else {