Commit 0f4c6e15 authored by Sander Bollen's avatar Sander Bollen
Browse files

Merge branch 'develop' into fix-gvcfs-extract

Conflicts:
	shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala
parents 2491e655 f963bb6e
......@@ -24,6 +24,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.collection.mutable
import scala.io.Source
import scala.util.matching.Regex
/**
* Extension for cutadapt
......@@ -163,6 +164,51 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
(if (outputAsStsout) "" else required("--output", fastqOutput) +
" > " + required(statsOutput))
def extractClippedAdapters(statsOutput: File): Map[String, Any] = {
val histoCountRow: Regex = """([\d]+)\t([\d]+)\t.*""".r
val adapterR = """Sequence: ([C|T|A|G]+);.*Trimmed: ([\d]+) times\.""".r
val statsFile = Source.fromFile(statsOutput)
val adapterRawStats: Array[String] = statsFile.mkString
.split("=== Adapter [\\d]+ ===")
.filter(_.contains("Sequence")
)
statsFile.close()
adapterRawStats.map(adapter => {
var adapterName = ""
var adapterCount = 0
// identify the adapter name and count
for (line <- adapter.split("\n")) {
line match {
case adapterR(adapter, count) => {
adapterName = adapter
adapterCount = count.toInt
}
case _ =>
}
}
// parse the block that gives the histogram of clipped bases and from which end
val counts = adapter.split("Overview of removed sequences ")
.filter(x => x.contains("length"))
.map(clipSideRawStats => {
val clipSideLabel = if (clipSideRawStats.contains("5'")) { "5p" } else { "3p" }
val histogramValues = clipSideRawStats.split("\n").flatMap({
case histoCountRow(length, count) => Some(length.toInt -> count.toInt)
case _ => None
})
clipSideLabel -> histogramValues.toMap
})
adapterName -> Map(
"count" -> adapterCount,
"histogram" -> counts.toMap
)
}).toMap // converting the Array[String] containing map-items to Map with 'toMap'
}
/** Output summary stats */
def summaryStats: Map[String, Any] = {
/**
......@@ -177,7 +223,6 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
val tooLongR = """.* that were too long: *([,\d]+) .*""".r
val tooManyN = """.* with too many N: *([,\d]+) .*""".r
val adapterR = """Sequence ([C|T|A|G]*);.*Trimmed: ([,\d]+) times.""".r
val basePairsProcessed = """Total basepairs processed: *([,\d]+) bp""".r
val basePairsWritten = """Total written \(filtered\): *([,\d]+) bp .*""".r
......@@ -192,24 +237,28 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
"bpoutput" -> 0,
"toomanyn" -> 0
)
val adapterStats: mutable.Map[String, Long] = mutable.Map()
// extract the adapters with its histogram
val adapterStats = if (statsOutput.exists) {
extractClippedAdapters(statsOutput)
} else Map.empty
if (statsOutput.exists) {
val statsFile = Source.fromFile(statsOutput)
for (line <- statsFile.getLines()) {
line match {
case processedReads(m) => stats("processed") = m.replaceAll(",", "").toLong
case withAdapters(m) => stats("withadapters") = m.replaceAll(",", "").toLong
case readsPassingFilters(m) => stats("passingfilters") = m.replaceAll(",", "").toLong
case tooShortR(m) => stats("tooshort") = m.replaceAll(",", "").toLong
case tooLongR(m) => stats("toolong") = m.replaceAll(",", "").toLong
case tooManyN(m) => stats("toomanyn") = m.replaceAll(",", "").toLong
case basePairsProcessed(m) => stats("bpinput") = m.replaceAll(",", "").toLong
case basePairsWritten(m) => stats("bpoutput") = m.replaceAll(",", "").toLong
case adapterR(adapter, count) => adapterStats += (adapter -> count.toLong)
case _ =>
case processedReads(m) => stats("processed") = m.replaceAll(",", "").toLong
case withAdapters(m) => stats("withadapters") = m.replaceAll(",", "").toLong
case readsPassingFilters(m) => stats("passingfilters") = m.replaceAll(",", "").toLong
case tooShortR(m) => stats("tooshort") = m.replaceAll(",", "").toLong
case tooLongR(m) => stats("toolong") = m.replaceAll(",", "").toLong
case tooManyN(m) => stats("toomanyn") = m.replaceAll(",", "").toLong
case basePairsProcessed(m) => stats("bpinput") = m.replaceAll(",", "").toLong
case basePairsWritten(m) => stats("bpoutput") = m.replaceAll(",", "").toLong
case _ =>
}
}
statsFile.close()
}
val cleanReads = stats("processed") - stats("withadapters")
......@@ -223,8 +272,8 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
"num_reads_discarded_too_long" -> stats("toolong"),
"num_reads_discarded_many_n" -> stats("toomanyn"),
"num_bases_input" -> stats("bpinput"),
"num_based_output" -> stats("bpoutput"),
adaptersStatsName -> adapterStats.toMap
"num_bases_output" -> stats("bpoutput"),
adaptersStatsName -> adapterStats
)
}
......
......@@ -156,9 +156,9 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
override def beforeGraph(): Unit = {
super.beforeGraph()
if (!cache && !database) {
Logging.addError("Must supply either cache or database for VariantEffectPredictor")
Logging.addError("Must either set 'cache' or 'database' to true for VariantEffectPredictor")
} else if (cache && dir.isEmpty) {
Logging.addError("Must supply dir to cache for VariantEffectPredictor")
Logging.addError("Must supply 'dir_cache' to cache for VariantEffectPredictor")
}
if (statsText) _summary = new File(output.getAbsolutePath + "_summary.txt")
}
......
package nl.lumc.sasc.biopet.extensions.gatk.broad
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
class AnalyzeCovariates(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
......
package nl.lumc.sasc.biopet.extensions.gatk.broad
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import org.broadinstitute.gatk.utils.commandline.Argument
import org.broadinstitute.gatk.utils.commandline.Gather
import org.broadinstitute.gatk.utils.commandline.Input
import org.broadinstitute.gatk.utils.commandline.Output
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output }
class ApplyRecalibration(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
def analysis_type = "ApplyRecalibration"
......
package nl.lumc.sasc.biopet.extensions.gatk.broad
import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction
package nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles
import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction
/**
* Merges BAM files using htsjdk.samtools.MergeSamFiles.
......
package nl.lumc.sasc.biopet.extensions.gatk.broad
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ TaggedFile }
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
//TODO: check gathering
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.{ Reference, BiopetJavaCommandLineFunction }
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output }
class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction with Reference {
class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction {
analysisName = "CatVariants"
javaMainClass = "org.broadinstitute.gatk.tools.CatVariants"
javaMainClass = classOf[org.broadinstitute.gatk.tools.CatVariants].getName
/** genome reference file <name>.fasta */
@Input(fullName = "reference", shortName = "R", doc = "genome reference file <name>.fasta", required = true, exclusiveOf = "", validation = "")
var reference: File = _
@Input(required = true)
var inputFiles: List[File] = Nil
/** Input VCF file/s */
@Input(fullName = "variant", shortName = "V", doc = "Input VCF file/s", required = true, exclusiveOf = "", validation = "")
var variant: Seq[File] = Nil
@Output(required = true)
var outputFile: File = null
/** output file */
@Output(fullName = "outputFile", shortName = "out", doc = "output file", required = true, exclusiveOf = "", validation = "")
@Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction])
var outputFile: File = _
@Input
var reference: File = null
/** assumeSorted should be true if the input files are already sorted (based on the position of the variants) */
@Argument(fullName = "assumeSorted", shortName = "assumeSorted", doc = "assumeSorted should be true if the input files are already sorted (based on the position of the variants)", required = false, exclusiveOf = "", validation = "")
var assumeSorted: Boolean = _
var assumeSorted = false
/** which type of IndexCreator to use for VCF/BCF indices */
@Argument(fullName = "variant_index_type", shortName = "", doc = "which type of IndexCreator to use for VCF/BCF indices", required = false, exclusiveOf = "", validation = "")
var variant_index_type: Option[String] = None
override def beforeGraph(): Unit = {
super.beforeGraph()
if (reference == null) reference = referenceFasta()
}
/** the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator */
@Argument(fullName = "variant_index_parameter", shortName = "", doc = "the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator", required = false, exclusiveOf = "", validation = "")
var variant_index_parameter: Option[Int] = None
/** Set the minimum level of logging */
@Argument(fullName = "logging_level", shortName = "l", doc = "Set the minimum level of logging", required = false, exclusiveOf = "", validation = "")
var logging_level: String = _
/** Set the logging location */
@Output(fullName = "log_to_file", shortName = "log", doc = "Set the logging location", required = false, exclusiveOf = "", validation = "")
@Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction])
var log_to_file: File = _
override def cmdLine = super.cmdLine +
repeat("-V", inputFiles) +
required("-out", outputFile) +
required("-R", reference) +
conditional(assumeSorted, "--assumeSorted")
required("-R", reference, spaceSeparated = true, escape = true, format = "%s") +
repeat("-V", variant, spaceSeparated = true, escape = true, format = "%s") +
required("-out", outputFile, spaceSeparated = true, escape = true, format = "%s") +
conditional(assumeSorted, "-assumeSorted", escape = true, format = "%s") +
optional("--variant_index_type", variant_index_type, spaceSeparated = true, escape = true, format = "%s") +
optional("--variant_index_parameter", variant_index_parameter, spaceSeparated = true, escape = true, format = "%s") +
optional("-l", logging_level, spaceSeparated = true, escape = true, format = "%s") +
optional("-log", log_to_file, spaceSeparated = true, escape = true, format = "%s")
}
object CatVariants {
def apply(root: Configurable, input: List[File], output: File): CatVariants = {
val cv = new CatVariants(root)
cv.inputFiles = input
cv.outputFile = output
cv
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk.broad
package nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
......
package nl.lumc.sasc.biopet.extensions.gatk.broad
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output, _ }
class CombineGVCFs(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output }
/**
* Extension for CombineVariants from GATK
*
* Created by pjvan_thof on 2/26/15.
*
* @deprecated
*/
class CombineVariants(val root: Configurable) extends Gatk {
val analysisType = "CombineVariants"
@Input(doc = "", required = true)
var inputFiles: List[File] = Nil
@Output(doc = "", required = true)
var outputFile: File = null
var setKey: String = null
var rodPriorityList: String = null
var minimumN: Int = config("minimumN", default = 1)
var genotypeMergeOptions: Option[String] = config("genotypeMergeOptions")
var excludeNonVariants: Boolean = false
var inputMap: Map[File, String] = Map()
def addInput(file: File, name: String): Unit = {
inputFiles :+= file
inputMap += file -> name
}
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
class CombineVariants(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
def analysis_type = "CombineVariants"
scatterClass = classOf[LocusScatterFunction]
setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false }
/** VCF files to merge together */
@Input(fullName = "variant", shortName = "V", doc = "VCF files to merge together", required = true, exclusiveOf = "", validation = "")
var variant: Seq[File] = Nil
/** File to which variants should be written */
@Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "")
@Gather(classOf[CatVariantsGatherer])
var out: File = _
/** Determines how we should merge genotype records for samples shared across the ROD files */
@Argument(fullName = "genotypemergeoption", shortName = "genotypeMergeOptions", doc = "Determines how we should merge genotype records for samples shared across the ROD files", required = false, exclusiveOf = "", validation = "")
var genotypemergeoption: Option[String] = config("genotypemergeoption")
/** Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields */
@Argument(fullName = "filteredrecordsmergetype", shortName = "filteredRecordsMergeType", doc = "Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required = false, exclusiveOf = "", validation = "")
var filteredrecordsmergetype: Option[String] = config("filteredrecordsmergetype")
/** Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel) */
@Argument(fullName = "multipleallelesmergetype", shortName = "multipleAllelesMergeType", doc = "Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required = false, exclusiveOf = "", validation = "")
var multipleallelesmergetype: Option[String] = config("multipleallelesmergetype")
/** Ordered list specifying priority for merging */
@Argument(fullName = "rod_priority_list", shortName = "priority", doc = "Ordered list specifying priority for merging", required = false, exclusiveOf = "", validation = "")
var rod_priority_list: Option[String] = config("rod_priority_list")
/** Emit interesting sites requiring complex compatibility merging to file */
@Argument(fullName = "printComplexMerges", shortName = "printComplexMerges", doc = "Emit interesting sites requiring complex compatibility merging to file", required = false, exclusiveOf = "", validation = "")
var printComplexMerges: Boolean = config("printComplexMerges", default = false)
/** Treat filtered variants as uncalled */
@Argument(fullName = "filteredAreUncalled", shortName = "filteredAreUncalled", doc = "Treat filtered variants as uncalled", required = false, exclusiveOf = "", validation = "")
var filteredAreUncalled: Boolean = config("filteredAreUncalled", default = false)
/** Emit a sites-only file */
@Argument(fullName = "minimalVCF", shortName = "minimalVCF", doc = "Emit a sites-only file", required = false, exclusiveOf = "", validation = "")
var minimalVCF: Boolean = config("minimalVCF", default = false)
/** Exclude sites where no variation is present after merging */
@Argument(fullName = "excludeNonVariants", shortName = "env", doc = "Exclude sites where no variation is present after merging", required = false, exclusiveOf = "", validation = "")
var excludeNonVariants: Boolean = config("excludeNonVariants", default = false)
/** Key name for the set attribute */
@Argument(fullName = "setKey", shortName = "setKey", doc = "Key name for the set attribute", required = false, exclusiveOf = "", validation = "")
var setKey: Option[String] = config("set_key")
/** Assume input VCFs have identical sample sets and disjoint calls */
@Argument(fullName = "assumeIdenticalSamples", shortName = "assumeIdenticalSamples", doc = "Assume input VCFs have identical sample sets and disjoint calls", required = false, exclusiveOf = "", validation = "")
var assumeIdenticalSamples: Boolean = config("assumeIdenticalSamples", default = false)
/** Minimum number of input files the site must be observed in to be included */
@Argument(fullName = "minimumN", shortName = "minN", doc = "Minimum number of input files the site must be observed in to be included", required = false, exclusiveOf = "", validation = "")
var minimumN: Option[Int] = config("minimumN")
/** Do not output the command line to the header */
@Argument(fullName = "suppressCommandLineHeader", shortName = "suppressCommandLineHeader", doc = "Do not output the command line to the header", required = false, exclusiveOf = "", validation = "")
var suppressCommandLineHeader: Boolean = config("suppressCommandLineHeader", default = false)
/** Use the INFO content of the record with the highest AC */
@Argument(fullName = "mergeInfoWithMaxAC", shortName = "mergeInfoWithMaxAC", doc = "Use the INFO content of the record with the highest AC", required = false, exclusiveOf = "", validation = "")
var mergeInfoWithMaxAC: Boolean = config("mergeInfoWithMaxAC", default = false)
/** Filter out reads with CIGAR containing the N operator, instead of failing with an error */
@Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false)
/** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */
@Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false)
/** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */
@Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "")
var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false)
@Output
@Gather(enabled = false)
private var outputIndex: File = _
override def beforeGraph(): Unit = {
override def beforeGraph() {
super.beforeGraph()
outputIndex = VcfUtils.getVcfIndexFile(outputFile)
genotypeMergeOptions match {
case Some("UNIQUIFY") | Some("PRIORITIZE") | Some("UNSORTED") | Some("REQUIRE_UNIQUE") | None =>
case _ => throw new IllegalArgumentException("Wrong option for genotypeMergeOptions")
}
deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi"))
deps = deps.distinct
deps ++= variant.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig))
if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out))
outputIndex = VcfUtils.getVcfIndexFile(out)
}
override def cmdLine = super.cmdLine +
(for (file <- inputFiles) yield {
inputMap.get(file) match {
case Some(name) => required("-V:" + name, file)
case _ => required("-V", file)
}
}).mkString +
required("-o", outputFile) +
optional("--setKey", setKey) +
optional("--rod_priority_list", rodPriorityList) +
optional("-genotypeMergeOptions", genotypeMergeOptions) +
conditional(excludeNonVariants, "--excludeNonVariants")
repeat("-V", variant, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") +
optional("-o", out, spaceSeparated = true, escape = true, format = "%s") +
optional("-genotypeMergeOptions", genotypemergeoption, spaceSeparated = true, escape = true, format = "%s") +
optional("-filteredRecordsMergeType", filteredrecordsmergetype, spaceSeparated = true, escape = true, format = "%s") +
optional("-multipleAllelesMergeType", multipleallelesmergetype, spaceSeparated = true, escape = true, format = "%s") +
optional("-priority", rod_priority_list, spaceSeparated = true, escape = true, format = "%s") +
conditional(printComplexMerges, "-printComplexMerges", escape = true, format = "%s") +
conditional(filteredAreUncalled, "-filteredAreUncalled", escape = true, format = "%s") +
conditional(minimalVCF, "-minimalVCF", escape = true, format = "%s") +
conditional(excludeNonVariants, "-env", escape = true, format = "%s") +
optional("-setKey", setKey, spaceSeparated = true, escape = true, format = "%s") +
conditional(assumeIdenticalSamples, "-assumeIdenticalSamples", escape = true, format = "%s") +
optional("-minN", minimumN, spaceSeparated = true, escape = true, format = "%s") +
conditional(suppressCommandLineHeader, "-suppressCommandLineHeader", escape = true, format = "%s") +
conditional(mergeInfoWithMaxAC, "-mergeInfoWithMaxAC", escape = true, format = "%s") +
conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") +
conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") +
conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s")
}
object CombineVariants {
def apply(root: Configurable, input: List[File], output: File): CombineVariants = {
val cv = new CombineVariants(root)
cv.variant = input
cv.out = output
cv
}
}
package nl.lumc.sasc.biopet.extensions.gatk.broad
package nl.lumc.sasc.biopet.extensions.gatk
import collection.JavaConversions._
import org.broadinstitute.gatk.utils.interval.IntervalUtils
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.interval.IntervalUtils
import scala.collection.JavaConversions._
/**
* Splits intervals by contig instead of evenly.
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL