diff --git a/biopet-framework/.gitignore b/biopet-framework/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a6f89c2da7a029afa02b6e7a2bf80ad34958a311 --- /dev/null +++ b/biopet-framework/.gitignore @@ -0,0 +1 @@ +/target/ \ No newline at end of file diff --git a/biopet-framework/README b/biopet-framework/README new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/biopet-framework/pom.xml b/biopet-framework/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..9de3ff86ab1de3b6433ecb655ec34a78370f09a9 --- /dev/null +++ b/biopet-framework/pom.xml @@ -0,0 +1,113 @@ +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>nl.lumc.sasc</groupId> + <artifactId>Biopet-Framework</artifactId> + <version>0.1.0</version> + <packaging>jar</packaging> + + <name>Biopet-Framework</name> + <url>http://maven.apache.org</url> + + <parent> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Biopet</artifactId> + <version>0.1.0</version> + <relativePath>../</relativePath> + </parent> + + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + <sting.unpack.phase>prepare-package</sting.unpack.phase> + <sting.shade.phase>package</sting.shade.phase> + <sting.binary-dist.name>SASC-Pipelines</sting.binary-dist.name> + <app.main.class>org.broadinstitute.sting.queue.QCommandLine</app.main.class> + </properties> + + <dependencies> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>3.8.1</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scala-lang</groupId> + <artifactId>scala-compiler</artifactId> + <version>2.11.0</version> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.9.2</artifactId> + <version>2.0.M4</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.broadinstitute.sting</groupId> + <artifactId>queue-package</artifactId> + <version>3.1</version> + </dependency> + </dependencies> + <build> + <plugins> + <plugin> + <groupId>org.scala-tools</groupId> + <artifactId>maven-scala-plugin</artifactId> + <version>2.15.2</version> + <executions> + <execution> + <id>scala-compile</id> + <goals> + <goal>compile</goal> + <goal>testCompile</goal> + </goals> + <configuration> + <args> +<!-- <arg>-make:transitive</arg>--> + <arg>-dependencyfile</arg> + <arg>${project.build.directory}/.scala_dependencies</arg> + </args> + </configuration> + </execution> + </executions> + </plugin> +<!-- <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <version>2.7.2</version> + <executions> + <execution> + <id>default-test</id> + Disable the default-test by putting it in phase none + <phase>none</phase> + </execution> + </executions> + </plugin>--> +<!-- <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>2.3</version> + <configuration> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> + <manifestEntries> + <Main-Class>${app.main.class}</Main-Class> + <X-Compile-Source-JDK>${maven.compile.source}</X-Compile-Source-JDK> + <X-Compile-Target-JDK>${maven.compile.target}</X-Compile-Target-JDK> + </manifestEntries> + </transformer> + </transformers> + </configuration> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + </execution> + </executions> + </plugin>--> + </plugins> + </build> +</project> diff --git a/biopet-framework/src/main/java/nl/lumc/sasc/biopet/core/Config.scala b/biopet-framework/src/main/java/nl/lumc/sasc/biopet/core/Config.scala new file mode 100644 index 0000000000000000000000000000000000000000..b9774480390fa9e34dd5255506dea1f814ae8d2d --- /dev/null +++ b/biopet-framework/src/main/java/nl/lumc/sasc/biopet/core/Config.scala @@ -0,0 +1,148 @@ +package nl.lumc.sasc.biopet.core + +import scala.util.parsing.json._ +import java.io.File +import org.broadinstitute.sting.queue.util.Logging + +class Config(var map: Map[String,Any]) extends Logging { + def this() = { + this(Map()) + logger.info("Init phase of config") + loadDefaultConfig() + } + + def loadDefaultConfig() { + var globalFile: String = System.getenv("QUEUE_CONFIG") + if (globalFile != null) { + var file: File = new File(globalFile) + if (file.exists()) loadConfigFile(file) + else logger.warn("QUEUE_CONFIG value found but file does not exist, no glogal config is loaded") + } else logger.warn("QUEUE_CONFIG value not found, no glogal config is loaded") + } + + def contains(s:String) : Boolean = map.contains(s) + + def loadConfigFile(configFile:File) { + var returnMap: Map[String,Any] = Map() + var configJson = JSON.parseFull(scala.io.Source.fromFile(configFile).mkString) + this.logger.debug("Jsonfile: " + configJson) + returnMap = Config.valueToMap(configJson.get) + + map = Config.mergMaps(returnMap, map) + + this.logger.debug("config: " + map) + } + + def get(s:String) : Any = map(s) + def get(s:String, default:Any) : Any = if (contains(s)) get(s) else default + + def getAsString(s:String) : String = map(s).toString + def getAsString(s:String, default:String) : String = if (contains(s)) getAsString(s) else default + + def getAsInt(s:String) : Int = { + map(s) match { + case i:Double => return i.toInt + case i:Int => return i + case i:String => { + logger.warn("Value '" + s + "' is a string insteadof int in json file, trying auto convert") + return i.toInt + } + case _ => throw new IllegalStateException("Value '" + s + "' is not an int") + } + } + def getAsInt(s:String, default:Int) : Int = if (contains(s)) getAsInt(s) else default + + def getAsDouble(s:String) : Double = { + map(s) match { + case d:Double => return d + case d:Int => return d.toDouble + case d:String => { + logger.warn("Value '" + s + "' is a string insteadof int in json file, trying auto convert") + return d.toDouble + } + case _ => throw new IllegalStateException("Value '" + s + "' is not an int") + } + } + def getAsDouble(s:String, default:Double) : Double = if (contains(s)) getAsDouble(s) else default + + def getAsBoolean(s:String) : Boolean = { + map(s) match { + case b:Boolean => b + case b:String => { + logger.warn("Value '" + s + "' is a string insteadof boolean in json file, trying auto convert") + return b.contains("true") + } + case b:Int => { + logger.warn("Value '" + s + "' is a int insteadof boolean in json file, trying auto convert") + (b > 0) + } + case _ => throw new IllegalStateException("Value '" + s + "' is not an boolean") + } + } + def getAsBoolean(s:String, default:Boolean) : Boolean = if (contains(s)) getAsBoolean(s) else default + + def getAsList(s:String) : List[Any] = { + map(s) match { + case l:List[_] => return l + case _ => throw new IllegalStateException("Value '" + s + "' is not an List") + } + } + def getAsList(s:String, default:List[Any]) : List[Any] = if (contains(s)) getAsList(s) else default + def getAsListOfStrings(s:String) : List[String] = { + var l: List[String] = Nil + for (v <- getAsList(s)) l :+= v.toString + return l + } + def getAsListOfStrings(s:String, default:List[String]) : List[String] = if (contains(s)) getAsListOfStrings(s) else default + + def getAsMap(s:String) : Map[String,Any] = { + map(s) match { + case m:Map[_,_] => return Config.valueToMap(m) + case _ => throw new IllegalStateException("Value '" + s + "' is not an Map") + } + } + def getAsMap(s:String, default:Map[String,Any]) : Map[String,Any] = if (contains(s)) getAsMap(s) else default + + def getAsConfig(s:String, default:Map[String,Any]) : Config = if (contains(s)) new Config(getAsMap(s)) else new Config(default) + def getAsConfig(s:String) : Config = if (contains(s)) new Config(getAsMap(s)) else new Config(Map()) + + override def toString() : String = map.toString +} + +object Config { + def valueToMap(input:Any) : Map[String,Any] = { + var ouputMap: Map[String,Any] = Map() + input match { + case m:Map[_, _] => { + for ((k,v) <- m) { + k match { + case s:String => ouputMap += (s -> v) + case _ => throw new IllegalStateException("Key of map '" + m + "' is not an String") + } + } + } + case _ => throw new IllegalStateException("Value '" + input + "' is not an Map") + } + return ouputMap + } + + def mergMaps(map1:Map[String,Any],map2:Map[String,Any]) : Map[String,Any] = { + var newMap: Map[String,Any] = Map() + for (key <- map1.keySet.++(map2.keySet)) { + if (map1.contains(key) && !map2.contains(key)) newMap += (key -> map1(key)) + else if (!map1.contains(key) && map2.contains(key)) newMap += (key -> map2(key)) + else if (map1.contains(key) && map2.contains(key)) { + map1(key) match { + case m1:Map[_,_] => { + map2(key) match { + case m2:Map[_,_] => newMap += (key -> mergMaps(Config.valueToMap(m1),Config.valueToMap(m2))) + case _ => newMap += (key -> map1(key)) + } + } + case _ => newMap += (key -> map1(key)) + } + } + } + return newMap + } +} \ No newline at end of file diff --git a/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Bwa.scala b/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Bwa.scala new file mode 100644 index 0000000000000000000000000000000000000000..4b66ee2b5c4f3a0882e442e15b7b170dfd4ca580 --- /dev/null +++ b/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Bwa.scala @@ -0,0 +1,34 @@ +package nl.lumc.sasc.biopet.wrappers + +import nl.lumc.sasc.biopet.core._ +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.commandline._ +import java.io.File + +class Bwa(private var globalConfig: Config) extends CommandLineFunction { + def this() = this(new Config(Map())) + this.analysisName = "bwa" + var config: Config = globalConfig.getAsConfig("bwa") + + @Argument(doc="Bwa executeble", shortName="Bwa_Exe") + var bwa_exe: String = config.getAsString("exe", "/usr/local/bin/bwa") + @Input(doc="The reference file for the bam files.", shortName="R") var referenceFile: File = _ + @Input(doc="Fastq file R1", shortName="R1") var R1: File = _ + @Input(doc="Fastq file R2", shortName="R2", required=false) var R2: File = _ + @Output(doc="Output file SAM", shortName="output") var output: File = _ + + @Argument(doc="Readgroup header", shortName="RG", required=false) var RG: String = _ + @Argument(doc="M", shortName="M", required=false) var M: Boolean = config.getAsBoolean("M", true) + + def commandLine = { + required(bwa_exe) + + required("mem") + + optional("-t", nCoresRequest) + + optional("-R", RG) + + conditional(M, "-M") + + required(referenceFile) + + required(R1) + + optional(R2) + + " > " + required(output) + } +} \ No newline at end of file diff --git a/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Cutadapt.scala b/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Cutadapt.scala new file mode 100644 index 0000000000000000000000000000000000000000..dbe65357e2551bb49e883ddd124ce8c203df2e03 --- /dev/null +++ b/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Cutadapt.scala @@ -0,0 +1,71 @@ +package nl.lumc.sasc.biopet.wrappers + +import nl.lumc.sasc.biopet.core._ +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.commandline._ +import java.io.File +import scala.io.Source._ +import scala.sys.process._ + +class Cutadapt(private var globalConfig: Config) extends CommandLineFunction { + def this() = this(new Config(Map())) + analysisName = "cutadapt" + var config: Config = globalConfig.getAsConfig("cutadapt") + + @Input(doc="Cutadapt exe", required=false) + var cutadapt_exe: File = new File(config.getAsString("exe","/usr/local/bin/cutadapt")) + @Input(doc="Input fastq file") var fastq_input: File = _ + @Input(doc="Fastq contams file", required=false) var contams_file: File = _ + @Output(doc="Output fastq file") var fastq_output: File = _ + + var opt_adapter: Set[String] = config.getAsListOfStrings("adapter", Nil).to[Set] + var opt_anywhere: Set[String] = config.getAsListOfStrings("anywhere", Nil).to[Set] + var opt_front: Set[String] = config.getAsListOfStrings("front", Nil).to[Set] + + var opt_discard: Boolean = config.getAsBoolean("discard",false) + var opt_minimum_length: String = config.getAsInt("minimum_length", 1).toString + var opt_maximum_length: String = if (config.contains("maximum_length")) config.getAsInt("maximum_length").toString else null + + def commandLine = { + this.addJobReportBinding("version", getVersion) + this.getContamsFromFile + if (!opt_adapter.isEmpty || !opt_anywhere.isEmpty || !opt_front.isEmpty) { + required(cutadapt_exe) + + // options + repeat("-a", opt_adapter) + + repeat("-b", opt_anywhere) + + repeat("-g", opt_front) + + conditional(opt_discard, "--discard") + + optional("-m", opt_minimum_length) + + optional("-M", opt_maximum_length) + + // input / output + required(fastq_input) + + " > " + required(fastq_output) + } else { + "ln -sf " + + required(fastq_input) + + required(fastq_output) + } + } + + def getContamsFromFile { + if (contams_file != null) { + if (contams_file.exists()) { + for (line <- fromFile(contams_file).getLines) { + var s: String = line.substring(line.lastIndexOf("\t")+1, line.size) + opt_adapter += s + logger.info("Adapter: " + s + " found in: " + fastq_input) + } + } else logger.warn("File : " + contams_file + " does not exist") + } + } + + private var version: String = _ + def getVersion : String = { + if (version == null) { + val v: String = (cutadapt_exe + " --version").!!.replace("\n", "") + if (!v.isEmpty) version = v + } + return version + } +} \ No newline at end of file diff --git a/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Fastqc.scala b/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Fastqc.scala new file mode 100644 index 0000000000000000000000000000000000000000..55394a44c2724a68191acca5445b8578cb5d402f --- /dev/null +++ b/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Fastqc.scala @@ -0,0 +1,59 @@ +package nl.lumc.sasc.biopet.wrappers + +import nl.lumc.sasc.biopet.core._ +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.commandline._ +import java.io.File +import scala.sys.process._ + +class Fastqc(private var globalConfig: Config) extends CommandLineFunction { + def this() = this(new Config(Map())) + this.analysisName = "fastqc" + var config: Config = globalConfig.getAsConfig("fastqc") + + @Input(doc="fastqc executeble", shortName="Fastqc_Exe") + var fastqc_exe: File = new File(config.getAsString("exe","/usr/local/FastQC/FastQC_v0.10.1/fastqc")) + @Argument(doc="java vm executeble", shortName="Java_Exe", required=false) + var java_exe: String = globalConfig.getAsConfig("java").getAsString("exe", "java") + @Argument(doc="kmers", required=false) var kmers: Int = config.getAsInt("kmers", 5) + @Argument(doc="threads", required=false) var threads: Int = config.getAsInt("threads", 4) + @Argument(doc="quiet", required=false) var quiet: Boolean = config.getAsBoolean("quiet", false) + @Argument(doc="noextract", required=false) var noextract: Boolean = config.getAsBoolean("noextract", false) + @Argument(doc="nogroup", required=false) var nogroup: Boolean = config.getAsBoolean("nogroup", false) + @Input(doc="Contaminants", required=false) + var contaminants: File = new File(config.getAsString("contaminants",fastqc_exe.getParent() + "/Contaminants/contaminant_list.txt")) + @Input(doc="Fastq file", shortName="FQ") var fastqfile: File = _ + @Output(doc="Output", shortName="out") var output: File = _ + + def commandLine = { + this.addJobReportBinding("version", getVersion) + if (config.contains("fastqc_exe")) fastqc_exe = new File(config.get("fastqc_exe").toString) + this.nCoresRequest = Option(threads) + required(fastqc_exe) + + optional("--java", java_exe) + + optional("--threads",threads) + + optional("--contaminants",contaminants) + + optional("--kmers",kmers) + + conditional(nogroup, "--nogroup") + + conditional(noextract, "--noextract") + + conditional(quiet, "--quiet") + + required("-o",output.getParent()) + + required(fastqfile) + + required(" > ", output, escape=false) + } + + private var version: String = _ + def getVersion : String = { + val REG = """FastQC (.*)""".r + if (version == null) for (line <- (fastqc_exe + " --version").!!.split("\n")) { + line match { + case REG(m) => { + version = m + return version + } + case _ => + } + } + return version + } +} \ No newline at end of file diff --git a/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Sickle.scala b/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Sickle.scala new file mode 100644 index 0000000000000000000000000000000000000000..facb378da55fa5c21364b9c56e4eeb1c658c71e7 --- /dev/null +++ b/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Sickle.scala @@ -0,0 +1,71 @@ +package nl.lumc.sasc.biopet.wrappers + +import nl.lumc.sasc.biopet.core._ +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.commandline._ +import java.io.File +import scala.io.Source._ +import scala.sys.process._ + +class Sickle(private var globalConfig: Config) extends CommandLineFunction { + def this() = this(new Config(Map())) + this.analysisName = "sickle" + var config: Config = globalConfig.getAsConfig("sickle") + + @Input(doc="Sickle exe", required=false) var sickle_exe: File = new File("/usr/local/bin/sickle") + @Input(doc="R1 input") var input_R1: File = null + @Input(doc="R2 input", required=false) var input_R2: File = null + @Output(doc="R1 output") var output_R1: File = null + @Output(doc="R2 output", required=false) var output_R2: File = null + @Output(doc="singles output", required=false) var output_singles: File = null + @Output(doc="stats output") var output_stats: File = null + + @Input(doc="qualityType file", required=false) var qualityTypeFile: File = null + var qualityType: String = config.getAsString("qualitytype", null) + var defaultQualityType: String = config.getAsString("defaultqualitytype", "sanger") + + def commandLine = { + this.addJobReportBinding("version", getVersion) + this.getQualityTypeFromFile + if (qualityType == null && defaultQualityType != null) qualityType = defaultQualityType + var cmd: String = required(sickle_exe) + if (input_R2 != null) { + cmd += required("pe") + + required("-r", input_R2) + + required("-p", output_R2) + + required("-s", output_singles) + } else cmd += required("se") + cmd + + required("-f", input_R1) + + required("-f", input_R1) + + required("-t", qualityType) + + required("-o", output_R1) + + " > " + required(output_stats) + } + + def getQualityTypeFromFile { + if (qualityType == null && qualityTypeFile != null) { + if (qualityTypeFile.exists()) { + for (line <- fromFile(qualityTypeFile).getLines) { + var s: String = line.substring(0,line.lastIndexOf("\t")) + qualityType = s + } + } else logger.warn("File : " + qualityTypeFile + " does not exist") + } + } + + private var version: String = _ + def getVersion : String = { + val REG = """sickle version (.*)""".r + if (version == null) for (line <- (sickle_exe + " --version").!!.split("\n")) { + line match { + case REG(m) => { + version = m + return version + } + case _ => + } + } + return version + } +} \ No newline at end of file diff --git a/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Zcat.scala b/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Zcat.scala new file mode 100644 index 0000000000000000000000000000000000000000..36e31cfbc781b06309573d81dc1846a1d39f0726 --- /dev/null +++ b/biopet-framework/src/main/java/nl/lumc/sasc/biopet/wrappers/Zcat.scala @@ -0,0 +1,16 @@ +package nl.lumc.sasc.biopet.wrappers + +import nl.lumc.sasc.biopet.core._ +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.commandline._ +import java.io.File + +class Zcat(private var config: Config) extends CommandLineFunction { + def this() = this(new Config(Map())) + this.analysisName = "zcat" + + @Input(doc="Zipped file") var in: File = _ + @Output(doc="Unzipped file") var out: File = _ + + def commandLine = "zcat %s > %s".format(in, out) +} \ No newline at end of file diff --git a/flexiprep/.gitignore b/flexiprep/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a6f89c2da7a029afa02b6e7a2bf80ad34958a311 --- /dev/null +++ b/flexiprep/.gitignore @@ -0,0 +1 @@ +/target/ \ No newline at end of file diff --git a/flexiprep/README b/flexiprep/README new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/flexiprep/pom.xml b/flexiprep/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..9dd241d624226d037e0e8d239ad73eef118e3528 --- /dev/null +++ b/flexiprep/pom.xml @@ -0,0 +1,117 @@ +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>nl.lumc.sasc</groupId> + <artifactId>Flexiprep</artifactId> + <version>0.1.0</version> + <packaging>jar</packaging> + + <name>Flexiprep</name> + <url>http://maven.apache.org</url> + + <parent> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Biopet</artifactId> + <version>0.1.0</version> + <relativePath>../</relativePath> + </parent> + + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + <sting.unpack.phase>prepare-package</sting.unpack.phase> + <sting.shade.phase>package</sting.shade.phase> + <app.main.class>org.broadinstitute.sting.queue.QCommandLine</app.main.class> + </properties> + + <dependencies> +<!-- <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>3.8.1</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scala-lang</groupId> + <artifactId>scala-compiler</artifactId> + <version>2.11.0</version> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.9.2</artifactId> + <version>2.0.M4</version> + <scope>test</scope> + </dependency>--> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Biopet-Framework</artifactId> + <version>0.1.0</version> + </dependency> +<!-- <dependency> + <groupId>org.broadinstitute.sting</groupId> + <artifactId>queue-package</artifactId> + <version>3.1</version> + </dependency>--> + </dependencies> + <build> + <plugins> + <plugin> + <groupId>org.scala-tools</groupId> + <artifactId>maven-scala-plugin</artifactId> + <version>2.15.2</version> + <executions> + <execution> + <id>scala-compile</id> + <goals> + <goal>compile</goal> + <goal>testCompile</goal> + </goals> + <configuration> + <args> + <arg>-dependencyfile</arg> + <arg>${project.build.directory}/.scala_dependencies</arg> + </args> + </configuration> + </execution> + </executions> + </plugin> +<!-- <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <version>2.7.2</version> + <executions> + <execution> + <id>default-test</id> + Disable the default-test by putting it in phase none + <phase>none</phase> + </execution> + </executions> + </plugin>--> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>2.3</version> + <configuration> + <finalName>${project.artifactId}-${project.version}</finalName> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> + <manifestEntries> + <Main-Class>${app.main.class}</Main-Class> + <X-Compile-Source-JDK>${maven.compile.source}</X-Compile-Source-JDK> + <X-Compile-Target-JDK>${maven.compile.target}</X-Compile-Target-JDK> + </manifestEntries> + </transformer> + </transformers> + </configuration> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> +</project> diff --git a/flexiprep/run_flexiprep.sh b/flexiprep/run_flexiprep.sh new file mode 100755 index 0000000000000000000000000000000000000000..ca65a7d73bb3c82b1424a3b8f91c352cae4430e7 --- /dev/null +++ b/flexiprep/run_flexiprep.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +JAVA_EXE=java +JAR_FILE=`dirname $0`/target/Flexiprep*.jar +PIPELINE_SCRIPT=`dirname $0`/../flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala + +$JAVA_EXE $JAVA_OPTIONS -jar $JAR_FILE -S $PIPELINE_SCRIPT $@ diff --git a/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqSync.scala b/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqSync.scala new file mode 100644 index 0000000000000000000000000000000000000000..b33d93384817fdc1b754e50eaad11865144788e2 --- /dev/null +++ b/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqSync.scala @@ -0,0 +1,34 @@ +package nl.lumc.sasc.biopet.pipelines.flexiprep + +import nl.lumc.sasc.biopet.core._ +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.commandline._ +import java.io.File + +class FastqSync(private var config: Config) extends CommandLineFunction { + def this() = this(new Config(Map())) + analysisName = "FastqSync" + + @Argument(doc="Pyhton exe", shortName="script") var python_exe: String = "python" + @Input(doc="Pyhton script", shortName="script") + var python_script: File = new File("/home/jfjlaros/projects/ngs-misc/trunk/src/sync_paired_end_reads.py") + + @Input(doc="Start fastq") var input_start_fastq: File = _ + @Input(doc="R1 input") var input_R1: File = _ + @Input(doc="R2 input") var input_R2: File = _ + @Output(doc="R1 output") var output_R1: File = _ + @Output(doc="R2 output") var output_R2: File = _ + @Output(doc="stats output") var output_stats: File = _ + + def commandLine = { + required(python_exe) + + required(python_script) + + required(input_start_fastq) + + required(input_R1) + + required(input_R2) + + required(output_R1) + + required(output_R2) + + " > " + + required(output_stats) + } +} \ No newline at end of file diff --git a/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcToContams.scala b/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcToContams.scala new file mode 100644 index 0000000000000000000000000000000000000000..a17196301df1c7fef949c1e543df2e253cbe010a --- /dev/null +++ b/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcToContams.scala @@ -0,0 +1,28 @@ +package nl.lumc.sasc.biopet.pipelines.flexiprep + +import nl.lumc.sasc.biopet.core._ +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.commandline._ +import java.io.File + +class FastqcToContams(private var config: Config) extends CommandLineFunction { + def this() = this(new Config(Map())) + analysisName = "getContams" + + @Argument(doc="Pyhton exe", shortName="script") var python_exe: String = "python" + @Input(doc="Pyhton script", shortName="script") + var python_script: File = new File("/data/DIV5/SASC/project-057-Florentine/analysis/pipelines/magpie/modules/gatk01/modules/flexiprep/scripts/fastqc_contam.py") + @Input(doc="Fastqc output", shortName="fastqc", required=true) var fastqc_output: File = _ + @Input(doc="Contams input", shortName="fastqc", required=true) var contams_file: File = _ + @Output(doc="Output file", shortName="out", required=true) var out: File = _ + + + def commandLine = { + required(python_exe) + + required(python_script) + + required(fastqc_output.getParent()) + + required("-c",contams_file) + + " > " + + required(out) + } +} \ No newline at end of file diff --git a/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcToQualtype.scala b/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcToQualtype.scala new file mode 100644 index 0000000000000000000000000000000000000000..6227a49e6ed14740683ecaecf2044e58730eb63b --- /dev/null +++ b/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcToQualtype.scala @@ -0,0 +1,26 @@ +package nl.lumc.sasc.biopet.pipelines.flexiprep + +import nl.lumc.sasc.biopet.core._ +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.commandline._ +import java.io.File + +class FastqcToQualtype(private var config: Config) extends CommandLineFunction { + def this() = this(new Config(Map())) + analysisName = "getQualtype" + + @Argument(doc="Pyhton exe", shortName="script") var python_exe: String = "python" + @Input(doc="Pyhton script", shortName="script") + var python_script: File = new File("/data/DIV5/SASC/project-057-Florentine/analysis/pipelines/magpie/modules/gatk01/modules/flexiprep/scripts/qual_type_sickle.py") + @Input(doc="Fastqc output", shortName="fastqc", required=true) var fastqc_output: File = _ + @Output(doc="Output file", shortName="out", required=true) var out: File = _ + + + def commandLine = { + required(python_exe) + + required(python_script) + + required(fastqc_output.getParent()) + + " > " + + required(out) + } +} \ No newline at end of file diff --git a/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala new file mode 100644 index 0000000000000000000000000000000000000000..908361d7b0f90a092a54a809b2825590f6dc3283 --- /dev/null +++ b/flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -0,0 +1,173 @@ +package nl.lumc.sasc.biopet.pipelines.flexiprep + +import nl.lumc.sasc.biopet.core._ +import nl.lumc.sasc.biopet.wrappers._ +import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.queue.extensions.gatk._ +import org.broadinstitute.sting.queue.extensions.picard._ +import org.broadinstitute.sting.queue.function._ +import scala.util.parsing.json._ +import org.broadinstitute.sting.commandline._ + +class Flexiprep(private var config: Config) extends QScript { + def this() = this(new Config()) + + @Argument(doc="Config Json file",shortName="config", required=false) var configfiles: List[File] = Nil + @Input(doc="R1 fastq file", shortName="R1",required=true) var input_R1: File = _ + @Input(doc="R2 fastq file", shortName="R2", required=false) var input_R2: File = _ + @Argument(doc="Output directory", shortName="outputDir", required=true) var outputDir: String = _ + @Argument(doc="Skip Trim fastq files", shortName="skiptrim", required=false) var skipTrim: Boolean = false + @Argument(doc="Skip Clip fastq files", shortName="skipclip", required=false) var skipClip: Boolean = false + + var outputFiles:Map[String,File] = Map() + var paired: Boolean = (input_R2 != null) + + def script() { + for (file <- configfiles) config.loadConfigFile(file) + if (input_R1 == null) throw new IllegalStateException("Missing R1 on flexiprep module") + if (outputDir == null) throw new IllegalStateException("Missing Output directory on flexiprep module") + else if (!outputDir.endsWith("/")) outputDir += "/" + paired = (input_R2 != null) + + runInitialFastqc() + + outputFiles += ("output_R1" -> zcatIfNeeded(input_R1,outputDir)) + if (paired) outputFiles += ("output_R2" -> zcatIfNeeded(input_R2,outputDir)) + + var results: Map[String,File] = Map() + if (paired) { + results = runTrimClip(outputFiles("output_R1"), outputFiles("output_R2"), outputDir) + outputFiles += ("output_R1" -> results("output_R1")) + outputFiles += ("output_R2" -> results("output_R2")) + } else { + results = runTrimClip(outputFiles("output_R1"), outputDir) + outputFiles += ("output_R1" -> results("output_R1")) + } + + runFinalFastqc() + } + + def runInitialFastqc() { + var fastqc_R1 = runFastqc(input_R1,outputDir + "/fastqc_R1/") + outputFiles += ("fastqc_R1" -> fastqc_R1.output) + outputFiles += ("qualtype_R1" -> getQualtype(fastqc_R1)) + outputFiles += ("contams_R1" -> getContams(fastqc_R1)) + + if (paired) { + var fastqc_R2 = runFastqc(input_R2,outputDir + "/fastqc_R2/") + outputFiles += ("fastqc_R2" -> fastqc_R2.output) + outputFiles += ("qualtype_R2" -> getQualtype(fastqc_R2)) + outputFiles += ("contams_R2" -> getContams(fastqc_R2)) + } + + } + + def getQualtype(fastqc:Fastqc): File = { + val fastqcToQualtype = new FastqcToQualtype(config) + fastqcToQualtype.fastqc_output = fastqc.output + var out: File = swapExt(outputDir, fastqc.fastqfile, "", ".qualtype.txt") + fastqcToQualtype.out = out + add(fastqcToQualtype) + return out + } + + def getContams(fastqc:Fastqc): File = { + val fastqcToContams = new FastqcToContams(config) + fastqcToContams.fastqc_output = fastqc.output + var out: File = swapExt(outputDir, fastqc.fastqfile, "", ".contams.txt") + fastqcToContams.out = out + fastqcToContams.contams_file = fastqc.contaminants + add(fastqcToContams) + return out + } + + def runTrimClip(R1_in:File, outDir:String) : Map[String,File] = { return runTrimClip(R1_in, new File(""), outDir) } + def runTrimClip(R1_in:File, R2_in:File, outDir:String) : Map[String,File] = { + var results: Map[String,File] = Map() + + var R1: File = new File(R1_in) + var R2: File = new File(R2_in) + var R1_ext: String = R1.getName().substring(R1.getName().lastIndexOf("."), R1.getName().size) + var R2_ext: String = "" + if (paired) R2_ext = R2.getName().substring(R2.getName().lastIndexOf("."), R2.getName().size) + + if (!skipClip) { // Adapter clipping + val cutadapt_R1 = new Cutadapt(config) + cutadapt_R1.fastq_input = R1 + cutadapt_R1.fastq_output = swapExt(outDir, R1, R1_ext, ".clip"+R1_ext) + if (outputFiles.contains("contams_R1")) cutadapt_R1.contams_file = outputFiles("contams_R1") + add(cutadapt_R1) + R1 = cutadapt_R1.fastq_output + if (paired) { + val cutadapt_R2 = new Cutadapt(config) + cutadapt_R2.fastq_input = R2 + cutadapt_R2.fastq_output = swapExt(outDir, R2, R2_ext, ".clip"+R2_ext) + if (outputFiles.contains("contams_R2")) cutadapt_R2.contams_file = outputFiles("contams_R2") + add(cutadapt_R2) + R2 = cutadapt_R2.fastq_output + val fastqSync = new FastqSync(config) + fastqSync.input_start_fastq = cutadapt_R1.fastq_input + fastqSync.input_R1 = cutadapt_R1.fastq_output + fastqSync.input_R2 = cutadapt_R2.fastq_output + fastqSync.output_R1 = swapExt(outDir, R1, ".clip"+R1_ext, ".clipsync"+R1_ext) + fastqSync.output_R2 = swapExt(outDir, R2, ".clip"+R2_ext, ".clipsync"+R2_ext) + fastqSync.output_stats = swapExt(outDir, R1, ".clip"+R1_ext, ".clipsync.stats") + add(fastqSync) + R1 = fastqSync.output_R1 + R2 = fastqSync.output_R2 + } + } + + if (!skipTrim) { // Quality trimming + val sickle = new Sickle(config) + sickle.input_R1 = R1 + sickle.output_R1 = swapExt(outDir, R1, R1_ext, ".trim"+R1_ext) + if (outputFiles.contains("qualtype_R1")) sickle.qualityTypeFile = outputFiles("qualtype_R1") + if (paired) { + sickle.input_R2 = R2 + sickle.output_R2 = swapExt(outDir, R2, R2_ext, ".trim"+R2_ext) + sickle.output_singles = swapExt(outDir, R2, R2_ext, ".trim.singles"+R1_ext) + } + sickle.output_stats = swapExt(outDir, R1, R1_ext, ".trim.stats") + add(sickle) + R1 = sickle.output_R1 + if (paired) R2 = sickle.output_R2 + } + + results += ("output_R1" -> R1) + if (paired) results += ("output_R2" -> R2) + return results + } + + def runFinalFastqc() { + if (!skipTrim || !skipClip) { + outputFiles += ("fastqc_R1_final" -> runFastqc(outputFiles("output_R1"),outputDir + "/fastqc_qc_R1/").output) + if (paired) outputFiles += ("fastqc_R2_final" -> runFastqc(outputFiles("output_R2"),outputDir + "/fastqc_qc_R2/").output) + } + } + + def runFastqc(fastqfile:File, outDir:String) : Fastqc = { + val fastqcCommand = new Fastqc(config) + fastqcCommand.fastqfile = fastqfile + var filename: String = fastqfile.getName() + if (filename.endsWith(".gz")) filename = filename.substring(0,filename.size - 3) + if (filename.endsWith(".gzip")) filename = filename.substring(0,filename.size - 5) + if (filename.endsWith(".fastq")) filename = filename.substring(0,filename.size - 6) + //if (filename.endsWith(".fq")) filename = filename.substring(0,filename.size - 3) + fastqcCommand.output = outDir + "/" + filename + "_fastqc.ouput" + add(fastqcCommand) + return fastqcCommand + } + + def zcatIfNeeded(file:File, runDir:String) : File = { + if (file.getName().endsWith(".gz") || file.getName().endsWith(".gzip")) { + var newFile: File = swapExt(file,".gz","") + if (file.getName().endsWith(".gzip")) newFile = swapExt(file,".gzip","") + val zcatCommand = new Zcat(config) + zcatCommand.in = file + zcatCommand.out = new File(runDir + newFile) + add(zcatCommand) + return zcatCommand.out + } else return file + } +} \ No newline at end of file diff --git a/gatk/.gitignore b/gatk/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a6f89c2da7a029afa02b6e7a2bf80ad34958a311 --- /dev/null +++ b/gatk/.gitignore @@ -0,0 +1 @@ +/target/ \ No newline at end of file diff --git a/gatk/README b/gatk/README new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/gatk/pom.xml b/gatk/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..27b7d0332a1955735f3615230d792215b386117a --- /dev/null +++ b/gatk/pom.xml @@ -0,0 +1,122 @@ +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>nl.lumc.sasc</groupId> + <artifactId>Gatk</artifactId> + <version>0.1.0</version> + <packaging>jar</packaging> + + <name>Gatk</name> + <url>http://maven.apache.org</url> + + <parent> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Biopet</artifactId> + <version>0.1.0</version> + <relativePath>../</relativePath> + </parent> + + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + <sting.unpack.phase>prepare-package</sting.unpack.phase> + <sting.shade.phase>package</sting.shade.phase> + <app.main.class>org.broadinstitute.sting.queue.QCommandLine</app.main.class> + </properties> + + <dependencies> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>3.8.1</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scala-lang</groupId> + <artifactId>scala-compiler</artifactId> + <version>2.11.0</version> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.9.2</artifactId> + <version>2.0.M4</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Biopet-Framework</artifactId> + <version>0.1.0</version> + </dependency> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Flexiprep</artifactId> + <version>0.1.0</version> + </dependency> + <dependency> + <groupId>org.broadinstitute.sting</groupId> + <artifactId>queue-package</artifactId> + <version>3.1</version> + </dependency> + </dependencies> + <build> + <plugins> + <plugin> + <groupId>org.scala-tools</groupId> + <artifactId>maven-scala-plugin</artifactId> + <version>2.15.2</version> + <executions> + <execution> + <id>scala-compile</id> + <goals> + <goal>compile</goal> + <goal>testCompile</goal> + </goals> + <configuration> + <args> + <arg>-dependencyfile</arg> + <arg>${project.build.directory}/.scala_dependencies</arg> + </args> + </configuration> + </execution> + </executions> + </plugin> +<!-- <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <version>2.7.2</version> + <executions> + <execution> + <id>default-test</id> + Disable the default-test by putting it in phase none + <phase>none</phase> + </execution> + </executions> + </plugin>--> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>2.3</version> + <configuration> + <finalName>${project.artifactId}-${project.version}</finalName> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> + <manifestEntries> + <Main-Class>${app.main.class}</Main-Class> + <X-Compile-Source-JDK>${maven.compile.source}</X-Compile-Source-JDK> + <X-Compile-Target-JDK>${maven.compile.target}</X-Compile-Target-JDK> + </manifestEntries> + </transformer> + </transformers> + </configuration> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> +</project> diff --git a/gatk/run_gatk.sh b/gatk/run_gatk.sh new file mode 100755 index 0000000000000000000000000000000000000000..f89b39a46fb06c081414be28f8dbcb4826c5ab55 --- /dev/null +++ b/gatk/run_gatk.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +JAVA_EXE=java +QUEUE_JAR=/data/DIV5/SASC/common/programs/Queue-3.1-1/Queue.jar +JAR_FILE=`dirname $0`/target/Gatk*.jar +PIPELINE_SCRIPT=`dirname $0`/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala + +$JAVA_EXE -Xmx5g $JAVA_OPTIONS -jar $JAR_FILE -S $PIPELINE_SCRIPT $@ diff --git a/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala b/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala new file mode 100644 index 0000000000000000000000000000000000000000..5582b13a742adfcce940a4f846b6d121b537e5d1 --- /dev/null +++ b/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala @@ -0,0 +1,196 @@ +package nl.lumc.sasc.biopet.pipelines.gatk + +import nl.lumc.sasc.biopet.wrappers._ +import nl.lumc.sasc.biopet.core._ +import nl.lumc.sasc.biopet.pipelines.flexiprep._ +import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.queue.extensions.gatk._ +import org.broadinstitute.sting.queue.extensions.picard._ +import org.broadinstitute.sting.queue.function._ +import scala.util.parsing.json._ +import org.broadinstitute.sting.utils.variant._ + +class Gatk(private var globalConfig: Config) extends QScript { + @Argument(doc="Config Json file",shortName="config") var configfiles: List[File] = Nil + @Argument(doc="Only Sample",shortName="sample", required=false) var onlySample: String = _ + def this() = this(new Config()) + var config: Config = _ + var scatterCount: Int = _ + var referenceFile: File = _ + var dbsnp: File = _ + var gvcfFiles: List[File] = Nil + + trait gatkArguments extends CommandLineGATK { + this.reference_sequence = referenceFile + this.memoryLimit = 2 + this.jobResourceRequests :+= "h_vmem=4G" + } + + def init() { + for (file <- configfiles) globalConfig.loadConfigFile(file) + config = globalConfig.getAsConfig("gatk") + referenceFile = config.getAsString("referenceFile") + dbsnp = config.getAsString("dbsnp") + gvcfFiles = config.getAsListOfStrings("gvcfFiles", Nil) + scatterCount = config.getAsInt("scatterCount", 1) + } + + def script() { + this.init() + if (globalConfig.contains("Samples")) for ((key,value) <- globalConfig.getAsMap("Samples")) { + if (onlySample == null || onlySample == key) { + var sample:Config = globalConfig.getAsConfig("Samples").getAsConfig(key) + if (sample.getAsString("ID") == key) { + var files:Map[String,List[File]] = sampleJobs(sample) + if (files.contains("gvcf")) for (file <- files("gvcf")) gvcfFiles :+= file + } else logger.warn("Key is not the same as ID on value for sample") + } else logger.info("Skipping Sample: " + key) + } else logger.warn("No Samples found in config") + + if (onlySample == null) { + //SampleWide jobs + val genotypeGVCFs = new GenotypeGVCFs() with gatkArguments + genotypeGVCFs.variant = gvcfFiles + genotypeGVCFs.scatterCount = scatterCount + genotypeGVCFs.out = new File("final.vcf") + if (genotypeGVCFs.variant.size > 0) add(genotypeGVCFs) else logger.warn("No gVCFs to genotype") + } + } + + // Called for each sample + def sampleJobs(sampleConfig:Config) : Map[String,List[File]] = { + var outputFiles:Map[String,List[File]] = Map() + outputFiles += ("FinalBams" -> List()) + var runs:List[Map[String,File]] = Nil + if (sampleConfig.contains("ID")) { + var sampleID: String = sampleConfig.getAsString("ID") + this.logger.info("Starting generate jobs for sample: " + sampleID) + for (key <- sampleConfig.getAsMap("Runs").keySet) { + var runConfig = sampleConfig.getAsConfig("Runs").getAsConfig(key) + var run: Map[String,File] = runJobs(runConfig, sampleConfig) + var FinalBams:List[File] = outputFiles("FinalBams") + if (run.contains("FinalBam")) FinalBams :+= run("FinalBam") + else logger.warn("No Final bam for Sample: " + sampleID + " Run: " + runConfig) + outputFiles += ("FinalBams" -> FinalBams) + runs +:= run + } + + // Variant calling + val haplotypeCaller = new HaplotypeCaller with gatkArguments + if (scatterCount > 1) haplotypeCaller.scatterCount = scatterCount * 15 + haplotypeCaller.input_file = outputFiles("FinalBams") + haplotypeCaller.out = new File(sampleID + "/" + sampleID + ".gvcf.vcf") + if (dbsnp != null) haplotypeCaller.dbsnp = dbsnp + haplotypeCaller.nct = 3 + haplotypeCaller.memoryLimit = haplotypeCaller.nct * 2 + + // GVCF options + haplotypeCaller.emitRefConfidence = org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCaller.ReferenceConfidenceMode.GVCF + haplotypeCaller.variant_index_type = GATKVCFIndexType.LINEAR + haplotypeCaller.variant_index_parameter = 128000 + + if (haplotypeCaller.input_file.size > 0) { + add(haplotypeCaller) + outputFiles += ("gvcf" -> List(haplotypeCaller.out)) + } + } else { + this.logger.warn("Sample in config missing ID, skipping sample") + } + return outputFiles + } + + // Called for each run from a sample + def runJobs(runConfig:Config,sampleConfig:Config) : Map[String,File] = { + var outputFiles:Map[String,File] = Map() + var paired: Boolean = false + var runID: String = "" + var fastq_R1: String = "" + var fastq_R2: String = "" + var sampleID: String = sampleConfig.get("ID").toString + if (runConfig.contains("R1")) { + fastq_R1 = runConfig.get("R1").toString + if (runConfig.contains("R2")) { + fastq_R2 = runConfig.get("R2").toString + paired = true + } + if (runConfig.contains("ID")) runID = runConfig.get("ID").toString + else throw new IllegalStateException("Missing ID on run for sample: " + sampleID) + var runDir: String = sampleID + "/run_" + runID + "/" + + val flexiprep = new Flexiprep(config) + flexiprep.input_R1 = fastq_R1 + if (paired) flexiprep.input_R2 = fastq_R2 + flexiprep.outputDir = runDir + "flexiprep/" + flexiprep.script + addAll(flexiprep.functions) + + val bwaCommand = new Bwa(config) + bwaCommand.R1 = flexiprep.outputFiles("output_R1") + if (paired) bwaCommand.R2 = flexiprep.outputFiles("output_R2") + bwaCommand.referenceFile = referenceFile + bwaCommand.nCoresRequest = 8 + bwaCommand.jobResourceRequests :+= "h_vmem=6G" + bwaCommand.RG = "@RG\\t" + + "ID:" + sampleID + "_" + runID + "\\t" + + "LB:" + sampleID + "_" + runID + "\\t" + + "PL:illumina\\t" + + "CN:SASC\\t" + + "SM:" + sampleID + "\\t" + + "PU:na" + bwaCommand.output = new File(runDir + sampleID + "-run_" + runID + ".sam") + add(bwaCommand) + + val sortSam = new SortSam + sortSam.input :+= bwaCommand.output + sortSam.createIndex = true + sortSam.output = swapExt(runDir,bwaCommand.output,".sam",".bam") + sortSam.memoryLimit = 2 + sortSam.nCoresRequest = 2 + sortSam.jobResourceRequests :+= "h_vmem=4G" + add(sortSam) + + val markDuplicates = new MarkDuplicates + markDuplicates.input :+= sortSam.output + markDuplicates.output = swapExt(runDir,sortSam.output,".bam",".dedup.bam") + markDuplicates.REMOVE_DUPLICATES = false + markDuplicates.metrics = swapExt(runDir,markDuplicates.output,".bam",".metrics") + markDuplicates.outputIndex = swapExt(runDir,markDuplicates.output,".bam",".bai") + markDuplicates.memoryLimit = 2 + markDuplicates.jobResourceRequests :+= "h_vmem=4G" + add(markDuplicates) + + val realignerTargetCreator = new RealignerTargetCreator with gatkArguments + realignerTargetCreator.I :+= markDuplicates.output + realignerTargetCreator.o = swapExt(runDir,markDuplicates.output,".bam",".realign.intervals") + //realignerTargetCreator.nt = 1 + realignerTargetCreator.jobResourceRequests :+= "h_vmem=5G" + if (scatterCount > 1) realignerTargetCreator.scatterCount = scatterCount + add(realignerTargetCreator) + + val indelRealigner = new IndelRealigner with gatkArguments + indelRealigner.I :+= markDuplicates.output + indelRealigner.targetIntervals = realignerTargetCreator.o + indelRealigner.o = swapExt(runDir,markDuplicates.output,".bam",".realign.bam") + if (scatterCount > 1) indelRealigner.scatterCount = scatterCount + add(indelRealigner) + + val baseRecalibrator = new BaseRecalibrator with gatkArguments + baseRecalibrator.I :+= indelRealigner.o + baseRecalibrator.o = swapExt(runDir,indelRealigner.o,".bam",".baserecal") + baseRecalibrator.knownSites :+= dbsnp + if (scatterCount > 1) baseRecalibrator.scatterCount = scatterCount + baseRecalibrator.nct = 2 + add(baseRecalibrator) + + val printReads = new PrintReads with gatkArguments + printReads.I :+= indelRealigner.o + printReads.o = swapExt(runDir,indelRealigner.o,".bam",".baserecal.bam") + printReads.BQSR = baseRecalibrator.o + if (scatterCount > 1) printReads.scatterCount = scatterCount + add(printReads) + + outputFiles += ("FinalBam" -> printReads.o) + } else this.logger.error("Sample: " + sampleID + ": No R1 found for runs: " + runConfig) + return outputFiles + } +}