From 787d0c99a2293ed04fe2b5820f636f0c63f85f3f Mon Sep 17 00:00:00 2001
From: Wai Yi Leung <w.y.leung@lumc.nl>
Date: Tue, 27 Oct 2015 11:33:20 +0100
Subject: [PATCH] An update on example pipeline

---
 docs/developer/example-pipeline.md            | 112 ++++++++++++++++++
 docs/developer/example-tool.md                |  28 +++--
 .../group/pipelines/BiopetPipeline.scala      |   1 -
 .../group/pipelines/HelloPipeline.scala       |  42 +++++++
 4 files changed, 175 insertions(+), 8 deletions(-)
 create mode 100644 external-example/src/main/scala/org/example/group/pipelines/HelloPipeline.scala

diff --git a/docs/developer/example-pipeline.md b/docs/developer/example-pipeline.md
index 659baea82..ad075e071 100644
--- a/docs/developer/example-pipeline.md
+++ b/docs/developer/example-pipeline.md
@@ -1,10 +1,122 @@
 # Developer - Example pipeline
 
+This document/tutorial will show you how to add a new pipeline to biopet. The minimum requirement is having:
+
+ - A clean biopet checkout from git
+ - Texteditor or IntelliJ IDEA
+ 
+### Adding pipeline folder
+
+Via commandline:
+
+```
+cd biopet/public/
+mkdir -p mypipeline/src/main/scala/nl/lumc/sasc/biopet/pipelines/mypipeline
+```
 
 ### Adding maven project
 
+Adding a `pom.xml` to `biopet/public/mypipeline` folder. The example below is the minimum required POM definition
+
+```xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>Biopet</artifactId>
+        <groupId>nl.lumc.sasc</groupId>
+        <version>0.5.0-SNAPSHOT</version>
+        <relativePath>../</relativePath>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <inceptionYear>2015</inceptionYear>
+    <artifactId>MyPipeline</artifactId>
+    <name>MyPipeline</name>
+    <packaging>jar</packaging>
+
+    <dependencies>
+        <dependency>
+            <groupId>nl.lumc.sasc</groupId>
+            <artifactId>BiopetCore</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>nl.lumc.sasc</groupId>
+            <artifactId>BiopetToolsExtensions</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.testng</groupId>
+            <artifactId>testng</artifactId>
+            <version>6.8</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.scalatest</groupId>
+            <artifactId>scalatest_2.10</artifactId>
+            <version>2.2.1</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+</project>
+```
+
 ### Initial pipeline code
 
+In `biopet/public/mypipeline/src/main/scala/nl/lumc/sasc/biopet/pipelines/mypipeline` create a file named `HelloPipeline.scala` with the following contents:
+
+```scala
+package nl.lumc.sasc.biopet/pipelines.mypipeline
+
+import nl.lumc.sasc.biopet.core.PipelineCommand
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import nl.lumc.sasc.biopet.core.summary.SummaryQScript
+import org.broadinstitute.gatk.queue.QScript
+
+class HelloPipeline(val root: Configurable) extends QScript with SummaryQScript {
+  def this() = this(null)
+
+  /** Only required when using [[SummaryQScript]] */
+  def summaryFile = new File(outputDir, "hello.summary.json")
+
+  /** Only required when using [[SummaryQScript]] */
+  def summaryFiles: Map[String, File] = Map()
+
+  /** Only required when using [[SummaryQScript]] */
+  def summarySettings = Map()
+
+  // This method can be used to initialize some classes where needed
+  def init(): Unit = {
+  }
+
+  // This method is the actual pipeline
+  def biopetScript: Unit = {
+
+    // Executing a tool like FastQC
+    val shiva = new Shiva(this)
+    shiva.init()
+    shiva.biopetScript()
+    addAll(shiva.functions)
+
+    /* Only required when using [[SummaryQScript]] */
+    addSummaryQScript(shiva)
+
+    // From here you can use the output files of shiva as input file of other jobs
+  }
+}
+
+//TODO: Replace object Name, must be the same as the class of the pipeline
+object HelloPipeline extends PipelineCommand
+
+```
+
+
+
+
+
 ### Config setup
 
 ### Test pipeline
diff --git a/docs/developer/example-tool.md b/docs/developer/example-tool.md
index a6477b3b3..2c02efdbb 100644
--- a/docs/developer/example-tool.md
+++ b/docs/developer/example-tool.md
@@ -27,7 +27,9 @@ object SimpleTool extends ToolCommand {
 }
 ```
 
-This is the minimum setup for having a working tool. (not functional yet)
+This is the minimum setup for having a working tool. We will place some code for line counting in ``main``. Like in other 
+higher order programming languages like Java, C++, .Net. One need to specify an entry for the program to run. ``def main``
+is here the first entrypoint from commandline into your tool.
 
 
 ### Program arguments and environment variables
@@ -40,13 +42,13 @@ In biopet we facilitate an ``AbstractArgs`` case-class which stores the argument
   case class Args(inputFile: File = Nil, outputFile: Option[File] = None) extends AbstractArgs
 ```
 
-The arguments are stored in ``Args``
+The arguments are stored in ``Args``, this is a `Case Class` which acts as a java `HashMap` storing the arguments in an 
+object-like fashion.
 
-Then add code that fills the Args.
+Consuming and placing values in `Args` works as follows:
 
 ```scala
   class OptParser extends AbstractOptParser {
-
     head(
       s"""
          |$commandName - Count lines in a textfile
@@ -65,7 +67,11 @@ Then add code that fills the Args.
   }
 ```
 
-In the end your tool would look like the following:
+One has to implement class `OptParser` in order to fill `Args`. In `OptParser` one defines the commandline args and how it should be processed.
+ In our example, we just copy the values passed on the commandline. Further reading: [scala scopt](https://github.com/scopt/scopt)
+
+Let's compile the code into 1 file and test with real functional code:
+
 
 ```scala
 
@@ -134,15 +140,22 @@ object SimpleTool extends ToolCommand {
 
 ### Running your new tool
 
+#!TODO: write how to run the tool from a compiled state 
+
+
 ### Debugging the tool with IDEA
 
 ### Setting up unit tests
 
 ### Adding tool-extension for usage in pipeline
 
-When this tool is used in a pipeline in biopet, one has to add a tool wrapper for the tool created.
+In order to use this tool within biopet, one should write an `extension` for the tool. (as we also do for normal executables like `bwa-mem`)
  
-The wrapper would look like:
+The wrapper would look like this, basicly exposing the same commandline arguments to biopet in an OOP format.
+Note: we also add some functionalities for getting summary data and passing on to biopet.
+
+The concept of having (extension)-wrappers is to create a black-box service model. One should only know how to interact with the tool without necessarily knowing the internals.
+
 
 ```scala
 package nl.lumc.sasc.biopet.extensions.tools
@@ -169,6 +182,7 @@ class SimpleTool(val root: Configurable) extends ToolCommandFunction with Summar
   @Output(doc = "Output JSON", shortName = "output", required = true)
   var output: File = _
 
+  // setting the memory for this tool where it starts from.
   override def defaultCoreMemory = 1.0
 
   override def cmdLine = super.cmdLine +
diff --git a/external-example/src/main/scala/org/example/group/pipelines/BiopetPipeline.scala b/external-example/src/main/scala/org/example/group/pipelines/BiopetPipeline.scala
index 6099047a6..f28b4d52f 100644
--- a/external-example/src/main/scala/org/example/group/pipelines/BiopetPipeline.scala
+++ b/external-example/src/main/scala/org/example/group/pipelines/BiopetPipeline.scala
@@ -1,7 +1,6 @@
 package org.example.group.pipelines
 
 import nl.lumc.sasc.biopet.core.PipelineCommand
-import nl.lumc.sasc.biopet.utils.config.Configurable
 import nl.lumc.sasc.biopet.core.summary.SummaryQScript
 import nl.lumc.sasc.biopet.pipelines.shiva.Shiva
 import nl.lumc.sasc.biopet.utils.config.Configurable
diff --git a/external-example/src/main/scala/org/example/group/pipelines/HelloPipeline.scala b/external-example/src/main/scala/org/example/group/pipelines/HelloPipeline.scala
new file mode 100644
index 000000000..cd4a158c1
--- /dev/null
+++ b/external-example/src/main/scala/org/example/group/pipelines/HelloPipeline.scala
@@ -0,0 +1,42 @@
+package nl.lumc.sasc.biopet/pipelines.mypipeline
+
+import nl.lumc.sasc.biopet.core.PipelineCommand
+import nl.lumc.sasc.biopet.core.summary.SummaryQScript
+import nl.lumc.sasc.biopet.extensions.Fastqc
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.queue.QScript
+
+class HelloPipeline(val root: Configurable) extends QScript with SummaryQScript {
+  def this() = this(null)
+
+  /** Only required when using [[SummaryQScript]] */
+  def summaryFile = new File(outputDir, "hello.summary.json")
+
+  /** Only required when using [[SummaryQScript]] */
+  def summaryFiles: Map[String, File] = Map()
+
+  /** Only required when using [[SummaryQScript]] */
+  def summarySettings = Map()
+
+  // This method can be used to initialize some classes where needed
+  def init(): Unit = {
+  }
+
+  // This method is the actual pipeline
+  def biopetScript: Unit = {
+
+    // Executing a tool like FastQC, calling the extension in `nl.lumc.sasc.biopet.extensions.Fastqc`
+
+    val fastqc = new Fastqc(this)
+    fastqc.fastqfile = config("fastqc_input")
+    fastqc.output = new File(outputDir,
+
+    /* Only required when using [[SummaryQScript]] */
+    addSummaryQScript(shiva)
+
+    // From here you can use the output files of shiva as input file of other jobs
+  }
+}
+
+//TODO: Replace object Name, must be the same as the class of the pipeline
+object HelloPipeline extends PipelineCommand
\ No newline at end of file
-- 
GitLab