From ccf09c1a2d004df38e315c75998de54644b196c6 Mon Sep 17 00:00:00 2001
From: Wai Yi Leung <w.y.leung@lumc.nl>
Date: Thu, 29 Jan 2015 16:09:32 +0100
Subject: [PATCH] Fixes in summary, newline and introduce RPKMonly option to
 generate rpkm files..

---
 public/kopisu/Kopisu.iml                      | 92 -------------------
 .../pipelines/kopisu/ConiferPipeline.scala    | 65 +++++++------
 .../pipelines/kopisu/ConiferSummary.scala     |  3 +-
 3 files changed, 38 insertions(+), 122 deletions(-)
 delete mode 100644 public/kopisu/Kopisu.iml

diff --git a/public/kopisu/Kopisu.iml b/public/kopisu/Kopisu.iml
deleted file mode 100644
index dc0afbee2..000000000
--- a/public/kopisu/Kopisu.iml
+++ /dev/null
@@ -1,92 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
-  <component name="NewModuleRootManager" inherit-compiler-output="false">
-    <output url="file://$MODULE_DIR$/target/classes" />
-    <output-test url="file://$MODULE_DIR$/target/test-classes" />
-    <content url="file://$MODULE_DIR$">
-      <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
-      <sourceFolder url="file://$MODULE_DIR$/src/main/scala" isTestSource="false" />
-      <excludeFolder url="file://$MODULE_DIR$/target" />
-    </content>
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-    <orderEntry type="module" module-name="BiopetFramework" />
-    <orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.11.2" level="project" />
-    <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-queue:3.3" level="project" />
-    <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-tools-public:3.3" level="project" />
-    <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-engine:3.3" level="project" />
-    <orderEntry type="library" name="Maven: org.scala-lang:scala-compiler:2.10.2" level="project" />
-    <orderEntry type="library" name="Maven: org.scala-lang:scala-reflect:2.10.2" level="project" />
-    <orderEntry type="library" name="Maven: log4j:log4j:1.2.15" level="project" />
-    <orderEntry type="library" name="Maven: net.sf.jgrapht:jgrapht:0.8.3" level="project" />
-    <orderEntry type="library" name="Maven: org.apache.commons:commons-email:1.2" level="project" />
-    <orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" />
-    <orderEntry type="library" name="Maven: javax.mail:mail:1.4.4" level="project" />
-    <orderEntry type="module-library">
-      <library name="Maven: com.sun:tools:1.4.2">
-        <CLASSES>
-          <root url="jar:///usr/lib/jvm/java-7-openjdk-amd64/lib/tools.jar!/" />
-        </CLASSES>
-        <JAVADOC />
-        <SOURCES />
-      </library>
-    </orderEntry>
-    <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-queue-extensions-distribution:3.3" level="project" />
-    <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-tools-protected:3.3" level="project" />
-    <orderEntry type="library" name="Maven: gov.nist.math:jama:1.0.2" level="project" />
-    <orderEntry type="library" scope="RUNTIME" name="Maven: org.broadinstitute.gatk:gatk-queue-extensions-generator:3.3" level="project" />
-    <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-utils:3.3" level="project" />
-    <orderEntry type="library" name="Maven: samtools:htsjdk:1.120.1620" level="project" />
-    <orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.0.3-rc3" level="project" />
-    <orderEntry type="library" name="Maven: picard:picard:1.120.1579" level="project" />
-    <orderEntry type="library" name="Maven: org.apache.ant:ant:1.8.2" level="project" />
-    <orderEntry type="module-library">
-      <library name="Maven: com.sun:tools.jar:1.5">
-        <CLASSES>
-          <root url="jar:///usr/lib/jvm/java-7-openjdk-amd64/lib/tools.jar!/" />
-        </CLASSES>
-        <JAVADOC />
-        <SOURCES />
-      </library>
-    </orderEntry>
-    <orderEntry type="library" name="Maven: colt:colt:1.2.0" level="project" />
-    <orderEntry type="library" name="Maven: concurrent:concurrent:1.3.4" level="project" />
-    <orderEntry type="library" name="Maven: it.unimi.dsi:fastutil:6.5.3" level="project" />
-    <orderEntry type="library" name="Maven: org.simpleframework:simple-xml:2.0.4" level="project" />
-    <orderEntry type="library" name="Maven: stax:stax-api:1.0.1" level="project" />
-    <orderEntry type="library" name="Maven: stax:stax:1.2.0" level="project" />
-    <orderEntry type="library" name="Maven: org.reflections:reflections:0.9.9-RC1" level="project" />
-    <orderEntry type="library" name="Maven: org.javassist:javassist:3.16.1-GA" level="project" />
-    <orderEntry type="library" name="Maven: dom4j:dom4j:1.6.1" level="project" />
-    <orderEntry type="library" name="Maven: xml-apis:xml-apis:1.0.b2" level="project" />
-    <orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.6.1" level="project" />
-    <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.6.1" level="project" />
-    <orderEntry type="library" name="Maven: org.freemarker:freemarker:2.3.18" level="project" />
-    <orderEntry type="library" name="Maven: org.apache.commons:commons-jexl:2.1.1" level="project" />
-    <orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.1" level="project" />
-    <orderEntry type="library" name="Maven: commons-lang:commons-lang:2.5" level="project" />
-    <orderEntry type="library" name="Maven: commons-io:commons-io:2.1" level="project" />
-    <orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.1" level="project" />
-    <orderEntry type="library" name="Maven: org.apache.commons:commons-math:2.2" level="project" />
-    <orderEntry type="library" name="Maven: net.java.dev.jna:jna:3.2.7" level="project" />
-    <orderEntry type="library" name="Maven: net.java.dev.jets3t:jets3t:0.8.1" level="project" />
-    <orderEntry type="library" name="Maven: commons-codec:commons-codec:1.3" level="project" />
-    <orderEntry type="library" name="Maven: commons-httpclient:commons-httpclient:3.1" level="project" />
-    <orderEntry type="library" name="Maven: com.jamesmurty.utils:java-xmlbuilder:0.4" level="project" />
-    <orderEntry type="library" name="Maven: us.levk:drmaa-gridengine:6.2u5" level="project" />
-    <orderEntry type="library" name="Maven: us.levk:drmaa-common:1.0" level="project" />
-    <orderEntry type="library" name="Maven: com.google.code.gson:gson:2.2.2" level="project" />
-    <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.1.1" level="project" />
-    <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.1" level="project" />
-    <orderEntry type="library" name="Maven: com.google.code.cofoja:cofoja:1.0-r139" level="project" />
-    <orderEntry type="library" name="Maven: io.argonaut:argonaut_2.11:6.1-M4" level="project" />
-    <orderEntry type="library" name="Maven: org.scalaz:scalaz-core_2.11:7.1.0" level="project" />
-    <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-parser-combinators_2.11:1.0.2" level="project" />
-    <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-xml_2.11:1.0.2" level="project" />
-    <orderEntry type="library" name="Maven: com.github.julien-truffaut:monocle-core_2.11:0.5.0" level="project" />
-    <orderEntry type="library" name="Maven: org.biojava:biojava3-core:3.1.0" level="project" />
-    <orderEntry type="library" name="Maven: org.biojava:biojava3-sequencing:3.1.0" level="project" />
-    <orderEntry type="library" name="Maven: com.google.guava:guava:18.0" level="project" />
-    <orderEntry type="library" name="Maven: com.github.scopt:scopt_2.10:3.2.0" level="project" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferPipeline.scala b/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferPipeline.scala
index ec4464a57..c45de4eac 100644
--- a/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferPipeline.scala
+++ b/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferPipeline.scala
@@ -26,7 +26,9 @@ import org.broadinstitute.gatk.queue.QScript
 import scala.io.Source
 
 class ConiferPipeline(val root: Configurable) extends QScript with BiopetQScript {
-
+  //*
+  // Kopisu - Coniferpipeline is a pipeline that can run standalone
+  // */
   def this() = this(null)
 
   /** Input bamfile  */
@@ -39,11 +41,14 @@ class ConiferPipeline(val root: Configurable) extends QScript with BiopetQScript
   var sampleLabel: String = _
 
   /** Exon definitions in bed format */
-  @Input(doc = "Exon definition file in bed format", fullName = "exon_bed", shortName = "bed", required = true)
-  var probeFile: File = _
+  @Input(doc = "Exon definition file in bed format", fullName = "exon_bed", shortName = "bed", required = false)
+  var probeFile: File = config("probeFile")
+
+  @Input(doc = "Previous RPKM files (controls)", fullName = "rpkm_controls", shortName = "rc", required = false)
+  var controlsDir: File = config("controlsDir")
 
-  @Input(doc = "Previous RPKM files (controls)", fullName = "rpkm_controls", shortName = "rc", required = true)
-  var rpkmControls: File = _
+  @Argument(doc = "Enable RPKM only mode, generate files for reference db", shortName = "rpkmonly", required = false)
+  var RPKMonly: Boolean = false
 
   val summary = new ConiferSummary(this)
 
@@ -78,35 +83,37 @@ class ConiferPipeline(val root: Configurable) extends QScript with BiopetQScript
     coniferRPKM.output = new File(RPKMdir + File.separator + input2RPKM(inputBam))
     add(coniferRPKM)
 
-    /** Collect the rpkm_output to a temp directory, where we merge with the control files */
-    var refRPKMlist: List[File] = Nil
-    for (f <- rpkmControls.listFiles()) {
-      var target = new File(RPKMdir + File.separator + f.getName)
-      if (!target.exists()) {
-        logger.info("Creating " + target.getAbsolutePath)
-        add(Ln(this, f, target, true))
-        refRPKMlist :+= target
+    if (!RPKMonly) {
+      /** Collect the rpkm_output to a temp directory, where we merge with the control files */
+      var refRPKMlist: List[File] = Nil
+      for (f <- controlsDir.listFiles()) {
+        var target = new File(RPKMdir + File.separator + f.getName)
+        if (!target.exists()) {
+          logger.info("Creating " + target.getAbsolutePath)
+          add(Ln(this, f, target, true))
+          refRPKMlist :+= target
+        }
       }
-    }
 
-    val coniferAnalyze = new ConiferAnalyze(this)
-    coniferAnalyze.deps = List(coniferRPKM.output) ++ refRPKMlist
-    coniferAnalyze.probes = this.probeFile
-    coniferAnalyze.rpkmDir = RPKMdir
-    coniferAnalyze.output = new File(sampleDir + File.separator + input2HDF5(inputBam))
-    add(coniferAnalyze)
+      val coniferAnalyze = new ConiferAnalyze(this)
+      coniferAnalyze.deps = List(coniferRPKM.output) ++ refRPKMlist
+      coniferAnalyze.probes = this.probeFile
+      coniferAnalyze.rpkmDir = RPKMdir
+      coniferAnalyze.output = new File(sampleDir + File.separator + input2HDF5(inputBam))
+      add(coniferAnalyze)
 
-    val coniferCall = new ConiferCall(this)
-    coniferCall.input = coniferAnalyze.output
-    coniferCall.output = new File(sampleDir + File.separator + "calls.txt")
-    add(coniferCall)
+      val coniferCall = new ConiferCall(this)
+      coniferCall.input = coniferAnalyze.output
+      coniferCall.output = new File(sampleDir + File.separator + "calls.txt")
+      add(coniferCall)
 
-    summary.deps = List(coniferCall.output)
-    summary.label = sampleLabel
-    summary.calls = coniferCall.output
-    summary.out = new File(sampleDir + File.separator + input2Calls(inputBam))
+      summary.deps = List(coniferCall.output)
+      summary.label = sampleLabel
+      summary.calls = coniferCall.output
+      summary.out = new File(sampleDir + File.separator + input2Calls(inputBam))
 
-    add(summary)
+      add(summary)
+    }
 
   }
 }
diff --git a/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferSummary.scala b/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferSummary.scala
index 1691a19df..78ffcbb29 100644
--- a/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferSummary.scala
+++ b/public/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferSummary.scala
@@ -31,10 +31,11 @@ class ConiferSummary(val root: Configurable) extends InProcessFunction with Conf
 
     for (line <- Source.fromFile(callFile).getLines()) {
       line.startsWith(sampleName) || line.startsWith("sampleID") match {
-        case true => writer.write(line)
+        case true => writer.write(line + "\n");
         case _    =>
       }
     }
+    writer.close()
   }
 
   this.analysisName = getClass.getSimpleName
-- 
GitLab