Commit 0efd042b authored by Sander Bollen's avatar Sander Bollen

Merge branch 'fix-BIOPET-425' into 'develop'

Fix biopet 425

Fixes BIOPET-425 and BIOPET-426

See merge request !483
parents 1337c894 643b1d89
......@@ -68,8 +68,6 @@ object Bam2Wig extends PipelineCommand {
val bamToBigWig = new Bam2Wig(root)
bamToBigWig.outputDir = bamFile.getParentFile
bamToBigWig.bamFile = bamFile
bamToBigWig.init()
bamToBigWig.biopetScript()
bamToBigWig
}
}
\ No newline at end of file
......@@ -114,6 +114,7 @@ object BamMetricsTest {
copyFile("ref.fa.fai")
val executables = Map(
"skip_write_dependencies" -> true,
"refFlat" -> "bla.refFlat",
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"samtools" -> Map("exe" -> "test"),
......
......@@ -144,6 +144,7 @@ object BastyTest {
copyFile("ref.fa.fai")
val config = Map(
"skip_write_dependencies" -> true,
"name_prefix" -> "test",
"cache" -> true,
"dir" -> "test",
......
......@@ -36,6 +36,8 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
var executable: String = _
var mainFunction = true
/** This is the default shell for drmaa jobs */
def defaultRemoteCommand = "bash"
private val remoteCommand: String = config("remote_command", default = defaultRemoteCommand)
......
......@@ -16,7 +16,7 @@ package nl.lumc.sasc.biopet.core
import java.io.File
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.summary.{ SummaryQScript, WriteSummary }
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import nl.lumc.sasc.biopet.utils.Logging
......@@ -63,6 +63,8 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
/** Returns the extension to make the report */
def reportClass: Option[ReportBuilderExtension] = None
val skipWriteDependencies: Boolean = config("skip_write_dependencies", default = false)
/** Script from queue itself, final to force some checks for each pipeline and write report */
final def script() {
outputDir = config("output_dir")
......@@ -85,7 +87,9 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
this match {
case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
logger.info("Write report is skipped because sample flag is used")
case _ => reportClass.foreach(add(_))
case _ => reportClass.foreach { report =>
add(report)
}
}
logger.info("Running pre commands")
......@@ -95,7 +99,8 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
f.beforeGraph()
f.internalBeforeGraph()
f.commandLine
case _ =>
case f: WriteSummary => f.init()
case _ =>
}
if (outputDir.getParentFile.canWrite || (outputDir.exists && outputDir.canWrite))
......@@ -118,7 +123,7 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
}
})
if (logger.isDebugEnabled) WriteDependencies.writeDependencies(functions, new File(outputDir, s".log/${qSettings.runName}.deps.json"))
if (!skipWriteDependencies) WriteDependencies.writeDependencies(functions, new File(outputDir, ".log"), qSettings.runName)
Logging.checkErrors()
logger.info("Script complete without errors")
......
......@@ -16,9 +16,11 @@ package nl.lumc.sasc.biopet.core
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.core.summary.WriteSummary
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
import org.broadinstitute.gatk.queue.function.{ CommandLineFunction, QFunction }
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
......@@ -37,7 +39,7 @@ object WriteDependencies extends Logging with Configurable {
case f => f.getClass.getSimpleName
}
cache += baseName -> (cache.getOrElse(baseName, 0) + 1)
function -> s"$baseName-${cache(baseName)}"
function -> s"${baseName.replaceAll("-", "_")}_${cache(baseName)}"
}).toMap
}
......@@ -45,9 +47,10 @@ object WriteDependencies extends Logging with Configurable {
* This method will generate a json file where information about job and file dependencies are stored
*
* @param functions This should be all functions that are given to the graph of Queue
* @param outputFile Json file to write dependencies to
* @param outputDir
* @param prefix prefix
*/
def writeDependencies(functions: Seq[QFunction], outputFile: File): Unit = {
def writeDependencies(functions: Seq[QFunction], outputDir: File, prefix: String): Unit = {
logger.info("Start calculating dependencies")
val errorOnMissingInput: Boolean = config("error_on_missing_input", false)
......@@ -107,27 +110,89 @@ object WriteDependencies extends Logging with Configurable {
val jobs = functionNames.par.map {
case (f, name) =>
name -> Map("command" -> (f match {
name.toString -> Map("command" -> (f match {
case cmd: CommandLineFunction => cmd.commandLine
case _ => None
}), "main_job" -> (f match {
case cmd: BiopetCommandLineFunction => cmd.mainFunction
case s: WriteSummary if s.qscript.root == null => true
case _ => false
}), "intermediate" -> f.isIntermediate,
"depends_on_intermediate" -> f.inputs.exists(files(_).isIntermediate),
"depends_on_jobs" -> f.inputs.toList.flatMap(files(_).outputJobNames).distinct,
"output_used_by_jobs" -> outputFiles(f).toList.flatMap(files(_).inputJobNames).distinct,
"outputs" -> outputFiles(f).toList,
"inputs" -> f.inputs.toList,
"done_files" -> f.doneOutputs.toList,
"fail_files" -> f.failOutputs.toList,
"stdout_file" -> f.jobOutputFile,
"done_at_start" -> f.isDone,
"fail_at_start" -> f.isFail)
}.toIterator.toMap
val outputFile = new File(outputDir, s"$prefix.deps.json")
logger.info(s"Writing dependencies to: $outputFile")
val writer = new PrintWriter(outputFile)
writer.println(ConfigUtils.mapToJson(Map(
"jobs" -> jobs.toMap,
"jobs" -> jobs,
"files" -> files.values.par.map(_.getMap).toList
)).spaces2)
writer.close()
val jobsDeps = jobs.map(x => x._1 -> (x._2("depends_on_jobs") match {
case l: List[_] => l.map(_.toString)
case _ => throw new IllegalStateException("Value 'depends_on_jobs' is not a list")
}))
val jobsWriter = new PrintWriter(new File(outputDir, s"$prefix.jobs.json"))
jobsWriter.println(ConfigUtils.mapToJson(jobsDeps).spaces2)
jobsWriter.close()
writeGraphvizFile(jobsDeps, new File(outputDir, s"$prefix.jobs.gv"))
writeGraphvizFile(compressOnType(jobsDeps), new File(outputDir, s"$prefix.compress.jobs.gv"))
val mainJobs = jobs.filter(_._2("main_job") == true).map {
case (name, job) =>
name -> getMainDependencies(name, jobs)
}
val mainJobsWriter = new PrintWriter(new File(outputDir, s"$prefix.main_jobs.json"))
mainJobsWriter.println(ConfigUtils.mapToJson(mainJobs).spaces2)
mainJobsWriter.close()
writeGraphvizFile(mainJobs, new File(outputDir, s"$prefix.main_jobs.gv"))
writeGraphvizFile(compressOnType(mainJobs), new File(outputDir, s"$prefix.compress.main_jobs.gv"))
logger.info("done calculating dependencies")
}
def getMainDependencies(jobName: String, jobsMap: Map[String, Map[String, Any]]): List[String] = {
val job = jobsMap(jobName)
val dependencies = job("depends_on_jobs") match {
case l: List[_] => l.map(_.toString)
}
dependencies.flatMap { dep =>
jobsMap(dep)("main_job") match {
case true => List(dep)
case false => getMainDependencies(dep, jobsMap)
}
}.distinct
}
val numberRegex = """(.*)_(\d*)$""".r
def compressOnType(jobs: Map[String, List[String]]): Map[String, List[String]] = {
val set = for ((job, deps) <- jobs.toSet; dep <- deps) yield {
job match {
case numberRegex(name, number) => (name, dep match {
case numberRegex(name, number) => name
})
}
}
set.groupBy(_._1).map(x => x._1 -> x._2.map(_._2).toList)
}
def writeGraphvizFile(jobs: Map[String, List[String]], outputFile: File): Unit = {
val writer = new PrintWriter(outputFile)
writer.println("digraph graphname {")
jobs.foreach { case (a, b) => b.foreach(c => writer.println(s" $c -> $a;")) }
writer.println("}")
writer.close()
}
}
......@@ -22,6 +22,8 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for md5sum */
class Md5sum(val root: Configurable) extends BiopetCommandLineFunction with Version {
mainFunction = false
@Input(doc = "Input")
var input: File = _
......
......@@ -49,15 +49,23 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
//TODO: add more checksums types
override def freezeFieldValues(): Unit = {
for (q <- qscript.summaryQScripts) deps :+= q.summaryFile
init()
super.freezeFieldValues()
}
def init(): Unit = {
for (q <- qscript.summaryQScripts)
deps :+= q.summaryFile
for ((_, l) <- qscript.summarizables; s <- l) s match {
case f: QFunction => deps :+= f.firstOutput
case f: QFunction => try {
deps :+= f.firstOutput
} catch {
case e: NullPointerException => logger.warn("Queue values are not init")
}
case _ =>
}
jobOutputFile = new File(out.getParentFile, ".%s.%s.out".format(out.getName, analysisName))
super.freezeFieldValues()
}
/** Function to create summary */
......
......@@ -15,16 +15,14 @@
package nl.lumc.sasc.biopet.core
import java.io.File
import java.nio.file.Files
import com.google.common.io.Files
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.queue.function.QFunction
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import scala.io.Source
/**
* Created by pjvanthof on 09/05/16.
*/
......@@ -42,11 +40,14 @@ class WriteDependenciesTest extends TestNGSuite with Matchers {
@Test
def testDeps: Unit = {
val outputFile = File.createTempFile("deps.", ".json")
val tempDir = Files.createTempDir()
tempDir.deleteOnExit()
val prefix = "test"
val outputFile = new File(tempDir, s"$prefix.deps.json")
outputFile.deleteOnExit()
val func1 = Qfunc(file1 :: Nil, file2 :: Nil)
val func2 = Qfunc(file2 :: Nil, file3 :: Nil)
WriteDependencies.writeDependencies(func1 :: func2 :: Nil, outputFile)
WriteDependencies.writeDependencies(func1 :: func2 :: Nil, tempDir, prefix)
val deps = ConfigUtils.fileToConfigMap(outputFile)
deps("jobs") shouldBe a[Map[_, _]]
val jobs = deps("jobs").asInstanceOf[Map[String, Map[String, Any]]]
......@@ -66,7 +67,7 @@ class WriteDependenciesTest extends TestNGSuite with Matchers {
}
object WriteDependenciesTest {
val tempDir = Files.createTempDirectory("test").toFile
val tempDir = Files.createTempDir()
tempDir.deleteOnExit()
val file1 = new File(tempDir, "file1.txt")
val file2 = new File(tempDir, "file2.txt")
......
......@@ -30,6 +30,8 @@ import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
class SeqStat(val root: Configurable) extends ToolCommandFunction with Summarizable {
def toolObject = nl.lumc.sasc.biopet.tools.SeqStat
mainFunction = false
@Input(doc = "Input FASTQ", shortName = "input", required = true)
var input: File = null
......
......@@ -32,6 +32,8 @@ import scala.io.Source
class VcfStats(val root: Configurable) extends ToolCommandFunction with Summarizable with Reference {
def toolObject = nl.lumc.sasc.biopet.tools.VcfStats
mainFunction = false
@Input(doc = "Input fastq", shortName = "I", required = true)
var input: File = _
......
......@@ -117,6 +117,7 @@ object CarpTest {
copyFile("ref.fa.fai")
val executables = Map(
"skip_write_dependencies" -> true,
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"fastqc" -> Map("exe" -> "test"),
"seqtk" -> Map("exe" -> "test"),
......
......@@ -115,6 +115,7 @@ object FlexiprepTest {
Files.touch(r2Zipped)
val executables = Map(
"skip_write_dependencies" -> true,
"seqstat" -> Map("exe" -> "test"),
"fastqc" -> Map("exe" -> "test"),
"seqtk" -> Map("exe" -> "test"),
......
......@@ -51,7 +51,8 @@ class GearsCentrifuge(val root: Configurable) extends QScript with SummaryQScrip
centrifugeKreport.centrifugeOutputFiles :+= fifo
centrifugeKreport.output = new File(outputDir, s"$outputName.$name.kreport")
centrifugeKreport.onlyUnique = unique
add(new BiopetFifoPipe(this, List(centrifugeKreport, Zcat(this, centrifugeOutput, fifo))))
val pipe = new BiopetFifoPipe(this, List(centrifugeKreport, Zcat(this, centrifugeOutput, fifo)))
add(pipe)
val krakenReportJSON = new KrakenReportToJson(this)
krakenReportJSON.inputReport = centrifugeKreport.output
......
......@@ -234,6 +234,7 @@ object TestGearsSingle {
bam.deleteOnExit()
val executables = Map(
"skip_write_dependencies" -> true,
"kraken" -> Map("exe" -> "test", "db" -> "test"),
"centrifuge" -> Map("exe" -> "test", "centrifuge_index" -> "test"),
"centrifugekreport" -> Map("exe" -> "test"),
......
......@@ -113,6 +113,7 @@ object GearsTest {
Files.touch(bam)
val config = Map(
"skip_write_dependencies" -> true,
"output_dir" -> outputDir,
"kraken" -> Map("exe" -> "test", "db" -> "test"),
"krakenreport" -> Map("exe" -> "test", "db" -> "test"),
......
......@@ -141,7 +141,9 @@ object DownloadGenomesTest {
val outputDir = Files.createTempDir()
outputDir.deleteOnExit()
val config = Map("output_dir" -> outputDir,
val config = Map(
"skip_write_dependencies" -> true,
"output_dir" -> outputDir,
"bwa" -> Map("exe" -> "test"),
"star" -> Map("exe" -> "test"),
"hisat2build" -> Map("exe" -> "test"),
......
......@@ -186,6 +186,7 @@ object GentrapTest {
copyFile("ref.fa.fai")
val executables: Map[String, Any] = Map(
"skip_write_dependencies" -> true,
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"refFlat" -> (outputDir + File.separator + "ref.fa"),
"annotation_gtf" -> (outputDir + File.separator + "ref.fa"),
......
......@@ -75,6 +75,7 @@ object GwasTestTest {
}
val config = Map(
"skip_write_dependencies" -> true,
"reference_fasta" -> reference.toString,
"phenotype_file" -> phenotypeFile.toString,
"output_dir" -> outputDir,
......
......@@ -93,6 +93,7 @@ object Impute2VcfTest {
}
val config = Map(
"skip_write_dependencies" -> true,
"reference_fasta" -> reference.toString,
"phenotype_file" -> phenotypeFile.toString,
"output_dir" -> outputDir,
......
......@@ -112,6 +112,7 @@ object KopisuTest {
Files.touch(new File(controlDir, "test.txt"))
val config = Map(
"skip_write_dependencies" -> true,
"name_prefix" -> "test",
"output_dir" -> outputDir,
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
......
......@@ -128,6 +128,7 @@ abstract class AbstractTestMapping(val aligner: String) extends TestNGSuite with
}
val executables = Map(
"skip_write_dependencies" -> true,
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"db" -> "test",
"bowtie_index" -> (outputDir + File.separator + "ref"),
......
......@@ -184,6 +184,7 @@ object MultisampleMappingTestTrait {
copyFile("empty.sam")
val config = Map(
"skip_write_dependencies" -> true,
"name_prefix" -> "test",
"cache" -> true,
"dir" -> "test",
......
......@@ -131,6 +131,7 @@ object SageTest {
copyFile("ref.fa.fai")
val config: Map[String, Any] = Map(
"skip_write_dependencies" -> true,
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"output_dir" -> outputDir.getAbsolutePath,
"fastqc" -> Map("exe" -> "test"),
......
......@@ -104,14 +104,7 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
def summarySettings = Map("sv_callers" -> configCallers.toList)
/** Files for the summary */
def summaryFiles: Map[String, File] = {
val callers: Set[String] = configCallers
//callersList.filter(x => callers.contains(x.name)).map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile)
Map(
"final_mergedvcf" -> outputMergedVCF
)
}
def summaryFiles: Map[String, File] = Map("final_mergedvcf" -> outputMergedVCF)
}
object ShivaSvCalling extends PipelineCommand
\ No newline at end of file
......@@ -36,7 +36,8 @@ class Bcftools(val root: Configurable) extends Variantcaller {
bt.v = true
bt.c = true
add(mp | new FixMpileup(this) | bt > outputFile)
val pipe = mp | new FixMpileup(this) | bt > outputFile
add(pipe)
add(Tabix(this, outputFile))
}
}
......@@ -38,7 +38,8 @@ class BcftoolsSingleSample(val root: Configurable) extends Variantcaller {
bt.c = true
bt.output = new File(outputDir, sample + ".vcf.gz")
add(mp | new FixMpileup(this) | bt)
val pipe = mp | new FixMpileup(this) | bt
add(pipe)
add(Tabix(this, bt.output))
bt.output
}
......
......@@ -28,7 +28,8 @@ class Freebayes(val root: Configurable) extends Variantcaller {
val fb = new nl.lumc.sasc.biopet.extensions.Freebayes(this)
fb.bamfiles = inputBams.values.toList
fb.outputVcf = new File(outputDir, namePrefix + ".freebayes.vcf")
add(fb | new Bgzip(this) > outputFile)
val pipe = fb | new Bgzip(this) > outputFile
add(pipe)
add(Tabix.apply(this, outputFile))
}
......
......@@ -40,7 +40,8 @@ class RawVcf(val root: Configurable) extends Variantcaller {
val m2v = new MpileupToVcf(this)
m2v.inputBam = bamFile
m2v.output = new File(outputDir, sample + ".raw.vcf")
add(mp | m2v)
val pipe = mp | m2v
add(pipe)
val vcfFilter = new VcfFilter(this) {
override def configNamespace = "vcffilter"
......
......@@ -198,6 +198,7 @@ object ShivaSvCallingTest {
copyFile("ref.fa.fai")
val config = Map(
"skip_write_dependencies" -> true,
"name_prefix" -> "test",
"output_dir" -> outputDir,
"cache" -> true,
......
......@@ -191,6 +191,7 @@ object ShivaTest {
copyFile("ref.fa.fai")
val config = Map(
"skip_write_dependencies" -> true,
"name_prefix" -> "test",
"cache" -> true,
"dir" -> "test",
......
......@@ -230,6 +230,7 @@ object ShivaVariantcallingTest {
copyFile("ref.fa.fai")
val config = Map(
"skip_write_dependencies" -> true,
"name_prefix" -> "test",
"output_dir" -> outputDir,
"cache" -> true,
......
......@@ -119,6 +119,7 @@ object TinyCapTest {
Files.touch(annotationRefflat)
val config = Map(
"skip_write_dependencies" -> true,
"output_dir" -> outputDir,
"reference_fasta" -> (referenceFasta.getAbsolutePath),
"bowtie_index" -> (bowtieIndex.getAbsolutePath),
......
......@@ -107,6 +107,7 @@ object ToucanTest {
exacVcfFile.deleteOnExit()
val config = Map(
"skip_write_dependencies" -> true,
"reference_fasta" -> resourcePath("/fake_chrQ.fa"),
"output_dir" -> outputDir,
"gatk_jar" -> "test",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment