Commit 9139289c authored by Peter van 't Hof's avatar Peter van 't Hof

Merge branch 'fix-svcallers-vcf' into 'develop'

Fix vcf files from SV calling output to contain samplename

The SV callers in our pipeline name the samples 'default', ideally all of these 'default' should be named to the sample called accordingly. Otherwise we have issues later on in the SV merging.

fixes #321 

See merge request !365
parents 6ae5046c 5c9e81cd
......@@ -30,19 +30,21 @@ import csv
import datetime
def main(tsvfile, vcffile):
def main(tsvfile, vcffile, samplename):
'''
:param tsvfile: filename of input file.tsv
:type tsvfile: string
:param vcffile: filename of output file.vcf
:type vcffile: string
:param samplename: Name of the sample
:type samplename: string
'''
with open(tsvfile) as reader:
# Parse file
dictreader = _parse_tsvfile(reader)
# Write out file
_format_vcffile(dictreader, vcffile)
_format_vcffile(dictreader, vcffile, samplename)
def _parse_tsvfile(readable):
'''
......@@ -92,11 +94,11 @@ _tsv_fields = ('Chr1', 'Pos1', 'Orientation1',
_vcf_fields = ('CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT', 'default')
_vcf_fields = ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT']
TS_NOW = datetime.datetime.now()
VCF_HEADER = """##fileformat=VCFv4.1
VCF_HEADER = """##fileformat=VCFv4.2
##fileDate={filedate}
##source=breakdancer-max
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
......@@ -106,6 +108,7 @@ VCF_HEADER = """##fileformat=VCFv4.1
##INFO=<ID=NOVEL,Number=0,Type=Flag,Description="Indicates a novel structural variation">
##INFO=<ID=SVEND,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=SVMETHOD,Number=0,Type=String,Description="Program called with">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
......@@ -138,7 +141,7 @@ VCF_HEADER = """##fileformat=VCFv4.1
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">""".format( filedate=TS_NOW.strftime( "%Y%m%d" ) )
def _format_vcffile(dictreader, vcffile):
def _format_vcffile(dictreader, vcffile, samplename):
'''
Create a pseudo .vcf file based on values read from DictReader instance.
:param dictreader: DictReader instance to read data from
......@@ -148,22 +151,22 @@ def _format_vcffile(dictreader, vcffile):
'''
FORMAT = "GT:DP"
with open(vcffile, mode='w') as writer:
writer.write('{header}\n#{columns}\n'.format(header=VCF_HEADER, columns='\t'.join(_vcf_fields)))
writer.write('{header}\n#{columns}\n'.format(header=VCF_HEADER, columns='\t'.join(_vcf_fields + [samplename])))
output_vcf = []
for line in dictreader:
CHROM = line['Chr1']
# TODO Figure out whether we have zero or one based positioning
POS = int(line['Pos1'])
ALT = '.'
ALT = '<{}>'.format(line['Type'])
SVEND = int(line['Pos2'])
INFO = 'PROGRAM=breakdancer;SVTYPE={}'.format(line['Type'])
INFO = 'SVMETHOD=breakdancer;SVTYPE={}'.format(line['Type'])
if line['Type'] not in ['CTX']:
INFO += ';SVLEN={}'.format(int(line['Size']))
INFO += ";SVEND={}".format(SVEND)
INFO += ";END={}".format(SVEND)
# write alternate ALT field for Intrachromosomal translocations
if line['Type'] in ['CTX']:
ALT = "N[{}:{}[".format(line['Chr2'], line['Pos2'])
......@@ -172,7 +175,7 @@ def _format_vcffile(dictreader, vcffile):
SAMPLEINFO = "{}:{}".format( '1/.', line['num_Reads'] )
# Create record
output_vcf.append([CHROM, POS, '.', '.', ALT, '.', 'PASS', INFO, FORMAT, SAMPLEINFO])
output_vcf.append([CHROM, POS, '.', 'N', ALT, '.', 'PASS', INFO, FORMAT, SAMPLEINFO])
# Sort all results
output_vcf.sort()
......@@ -184,9 +187,11 @@ def _format_vcffile(dictreader, vcffile):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--breakdancertsv', dest='breakdancertsv', type=str,
help='Breakdancer TSV outputfile')
help='Breakdancer TSV outputfile')
parser.add_argument('-o', '--outputvcf', dest='outputvcf', type=str,
help='Output vcf to')
help='Output vcf to')
parser.add_argument('-s', '--sample', dest='sample', type=str,
help='sample name')
args = parser.parse_args()
main(args.breakdancertsv, args.outputvcf)
main(args.breakdancertsv, args.outputvcf, args.sample)
......@@ -64,7 +64,7 @@ class Pysvtools(val root: Configurable) extends BiopetCommandLineFunction {
/** return commandline to execute */
def cmdLine = required(executable) +
repeat("-c", input) +
repeat("-c", exclusionRegions) +
optional("-f", flanking) +
"-i " + repeat(input) +
"-o " + required(tsvoutput) +
......
......@@ -19,7 +19,7 @@ import java.io.File
import nl.lumc.sasc.biopet.core.extensions.PythonCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import org.broadinstitute.gatk.utils.commandline._
class BreakdancerVCF(val root: Configurable) extends PythonCommandLineFunction {
setPythonScript("breakdancer2vcf.py")
......@@ -30,18 +30,23 @@ class BreakdancerVCF(val root: Configurable) extends PythonCommandLineFunction {
@Output(doc = "Output VCF to PATH")
var output: File = _
@Argument(doc = "Samplename")
var sample: String = _
def cmdLine = {
getPythonCommand +
"-i " + required(input) +
"-o " + required(output)
"-o " + required(output) +
"-s " + required(sample)
}
}
object BreakdancerVCF {
def apply(root: Configurable, input: File, output: File): BreakdancerVCF = {
def apply(root: Configurable, input: File, output: File, sample: String): BreakdancerVCF = {
val bd = new BreakdancerVCF(root)
bd.input = input
bd.output = output
bd.sample = sample
bd
}
}
\ No newline at end of file
......@@ -17,9 +17,9 @@ package nl.lumc.sasc.biopet.extensions.clever
import java.io.File
import nl.lumc.sasc.biopet.core.{ Version, Reference, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference, Version }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class CleverCaller(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
executable = config("exe", default = "clever")
......@@ -39,17 +39,17 @@ class CleverCaller(val root: Configurable) extends BiopetCommandLineFunction wit
@Input(doc = "Reference")
var reference: File = _
protected def workDir: File = new File(cwd, "work")
var cwd: File = _
protected def cleverOutputDir: File = new File(cleverWorkDir, "work")
var cleverWorkDir: File = _
@Output(doc = "Clever VCF output")
lazy val outputvcf: File = {
new File(cwd, "predictions.vcf")
new File(cleverOutputDir, "predictions.vcf")
}
@Output(doc = "Clever raw output")
lazy val outputraw: File = {
new File(workDir, "predictions.raw.txt")
new File(cleverOutputDir, "predictions.raw.txt")
}
// var T: Option[Int] = config("T", default = defaultThreads)
......@@ -61,7 +61,7 @@ class CleverCaller(val root: Configurable) extends BiopetCommandLineFunction wit
override def beforeGraph() {
super.beforeGraph()
if (workDir == null) throw new Exception("Clever :: Workdirectory is not defined")
if (cleverOutputDir == null) throw new Exception("Clever :: Workdirectory is not defined")
if (reference == null) reference = referenceFasta()
}
......@@ -75,14 +75,14 @@ class CleverCaller(val root: Configurable) extends BiopetCommandLineFunction wit
conditional(r, "-r") +
required(input) +
required(reference) +
required(workDir)
required(cleverOutputDir)
}
object CleverCaller {
def apply(root: Configurable, input: File, svDir: File): CleverCaller = {
val clever = new CleverCaller(root)
clever.input = input
clever.cwd = svDir
clever.cleverWorkDir = svDir
clever
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.clever
/**
* Created by wyleung on 4-4-16.
*/
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.utils.ToolCommand
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
import scala.io.Source
class CleverFixVCF(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName
@Input(doc = "Input Clever VCF")
var input: File = _
@Output(doc = "Output fixed VCF")
var output: File = _
@Argument(doc = "Samplename")
var sampleName: String = _
override def cmdLine = super.cmdLine +
required("-i", input) +
required("-o", output) +
required("-s", sampleName)
}
object CleverFixVCF extends ToolCommand {
case class Args(inputVCF: File = null, sampleLabel: String = "",
outputVCF: File = null) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('i', "inputvcf") required () valueName "<vcffile/path>" action { (x, c) =>
c.copy(inputVCF = x)
} text "Please specify the input Clever VCF file"
opt[String]('s', "samplelabel") valueName "<sample label>" action { (x, c) =>
c.copy(sampleLabel = x)
} text "Sample label is missing"
opt[File]('o', "outputvcf") valueName "<output>" action { (x, c) =>
c.copy(outputVCF = x)
} text "Output path is missing"
}
def replaceHeaderLine(inHeaderLine: String, toCheckFor: String, replacement: String, extraHeader: String): String = {
(inHeaderLine == toCheckFor) match {
case true => {
extraHeader + "\n" + replacement + "\n"
}
case _ => {
// We have to deal with matching records
// these don't start with #
inHeaderLine.startsWith("#") match {
case true =>
inHeaderLine + "\n"
case _ => {
// this should be a record
// Ensure the REF field is at least an N
val cols = inHeaderLine.split("\t")
cols(3) = "N"
cols.mkString("\t") + "\n"
}
}
}
}
}
val extraHeader = """##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##INFO=<ID=NOVEL,Number=0,Type=Flag,Description="Indicates a novel structural variation">
##INFO=<ID=SVEND,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
##INFO=<ID=BKPTID,Number=.,Type=String,Description="ID of the assembled alternate allele in the assembly file">
##INFO=<ID=MEINFO,Number=4,Type=String,Description="Mobile element info of the form NAME,START,END,POLARITY">
##INFO=<ID=METRANS,Number=4,Type=String,Description="Mobile element transduction info of the form CHR,START,END,POLARITY">
##INFO=<ID=DGVID,Number=1,Type=String,Description="ID of this element in Database of Genomic Variation">
##INFO=<ID=DBVARID,Number=1,Type=String,Description="ID of this element in DBVAR">
##INFO=<ID=DBRIPID,Number=1,Type=String,Description="ID of this element in DBRIP">
##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
##INFO=<ID=PARID,Number=1,Type=String,Description="ID of partner breakend">
##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
##INFO=<ID=BPWINDOW,Number=2,Type=Integer,Description="Window of breakpoints">
##INFO=<ID=CILEN,Number=2,Type=Integer,Description="Confidence interval around the inserted material between breakends">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Read Depth of segment containing breakend">
##INFO=<ID=DPADJ,Number=.,Type=Integer,Description="Read Depth of adjacency">
##INFO=<ID=CN,Number=1,Type=Integer,Description="Copy number of segment containing breakend">
##INFO=<ID=CNADJ,Number=.,Type=Integer,Description="Copy number of adjacency">
##INFO=<ID=CICN,Number=2,Type=Integer,Description="Confidence interval around copy number for the segment">
##INFO=<ID=CICNADJ,Number=.,Type=Integer,Description="Confidence interval around copy number for the adjacency">
##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events">
##FORMAT=<ID=CNL,Number=.,Type=Float,Description="Copy number genotype likelihood for imprecise events">
##FORMAT=<ID=NQ,Number=1,Type=Integer,Description="Phred style probability score that the variant is novel">
##FORMAT=<ID=HAP,Number=1,Type=Integer,Description="Unique haplotype identifier">
##FORMAT=<ID=AHAP,Number=1,Type=Integer,Description="Unique identifier of ancestral haplotype">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">"""
val vcfColHeader = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tdefault"
val vcfColReplacementHeader = s"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t"
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException)
val input: File = commandArgs.inputVCF
val output: File = commandArgs.outputVCF
val inputVCF = Source.fromFile(input)
val writer = new PrintWriter(output)
inputVCF.getLines().foreach(x =>
writer.write(replaceHeaderLine(x, vcfColHeader, vcfColReplacementHeader + commandArgs.sampleLabel, extraHeader))
)
writer.close()
inputVCF.close()
}
}
......@@ -31,14 +31,14 @@ class PindelConfig(val root: Configurable) extends BiopetJavaCommandLineFunction
var output: File = _
@Argument(doc = "Insertsize")
var insertSize: Int = _
var insertSize: Int = 0
var sampleName: String = _
override def cmdLine = super.cmdLine +
required("-i", input) +
required("-n", sampleName) +
required("-s", insertSize) +
{ if (insertSize == 0) "" else s" -s $insertSize " } +
required("-o", output)
}
......
......@@ -81,7 +81,6 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R
required("--reference", reference) +
required("--reference_name", referenceSpecies) +
required("--reference_date", rDate) +
required("--fake_biopet_input_holder", pindelOutputInputHolder) +
optional("--pindel_output", pindelOutput) +
optional("--pindel_output_root", pindelOutputRoot) +
required("--vcf", outputVCF) +
......
##fileformat=VCFv4.2
##fileDate=20150730
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##INFO=<ID=NOVEL,Number=0,Type=Flag,Description="Indicates a novel structural variation">
##INFO=<ID=SVEND,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
##INFO=<ID=BKPTID,Number=.,Type=String,Description="ID of the assembled alternate allele in the assembly file">
##INFO=<ID=MEINFO,Number=4,Type=String,Description="Mobile element info of the form NAME,START,END,POLARITY">
##INFO=<ID=METRANS,Number=4,Type=String,Description="Mobile element transduction info of the form CHR,START,END,POLARITY">
##INFO=<ID=DGVID,Number=1,Type=String,Description="ID of this element in Database of Genomic Variation">
##INFO=<ID=DBVARID,Number=1,Type=String,Description="ID of this element in DBVAR">
##INFO=<ID=DBRIPID,Number=1,Type=String,Description="ID of this element in DBRIP">
##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
##INFO=<ID=PARID,Number=1,Type=String,Description="ID of partner breakend">
##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
##INFO=<ID=BPWINDOW,Number=2,Type=Integer,Description="Window of breakpoints">
##INFO=<ID=CILEN,Number=2,Type=Integer,Description="Confidence interval around the inserted material between breakends">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Read Depth of segment containing breakend">
##INFO=<ID=DPADJ,Number=.,Type=Integer,Description="Read Depth of adjacency">
##INFO=<ID=CN,Number=1,Type=Integer,Description="Copy number of segment containing breakend">
##INFO=<ID=CNADJ,Number=.,Type=Integer,Description="Copy number of adjacency">
##INFO=<ID=CICN,Number=2,Type=Integer,Description="Confidence interval around copy number for the segment">
##INFO=<ID=CICNADJ,Number=.,Type=Integer,Description="Confidence interval around copy number for the adjacency">
##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events">
##FORMAT=<ID=CNL,Number=.,Type=Float,Description="Copy number genotype likelihood for imprecise events">
##FORMAT=<ID=NQ,Number=1,Type=Integer,Description="Phred style probability score that the variant is novel">
##FORMAT=<ID=HAP,Number=1,Type=Integer,Description="Unique haplotype identifier">
##FORMAT=<ID=AHAP,Number=1,Type=Integer,Description="Unique identifier of ancestral haplotype">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT testsample
chrM 352 L170961 N <DEL> . PASS BPWINDOW=353,16175;CILEN=15817,15829;IMPRECISE;SVLEN=-15823;SVTYPE=DEL GT:DP 1/.:76
##fileformat=VCFv4.2
##fileDate=20150730
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT default
chrM 352 L170961 N <DEL> . PASS BPWINDOW=353,16175;CILEN=15817,15829;IMPRECISE;SVLEN=-15823;SVTYPE=DEL GT:DP 1/.:76
package nl.lumc.sasc.biopet.extensions.clever
import java.io.File
import java.nio.file.Paths
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import scala.io.Source
/**
* Created by wyleung on 13-5-16.
*/
class CleverFixVCFTest extends TestNGSuite with Matchers {
/** Returns the absolute path to test resource directory as a File object */
private[clever] val resourceDir: File = new File(Paths.get(getClass.getResource(".").toURI).toString)
/** Given a resource file name, returns the the absolute path to it as a File object */
private[clever] def resourceFile(p: String): File = new File(resourceDir, p)
val rawCleverVCF = resourceFile("test.clever.vcf")
val expectedCleverVCF = resourceFile("expectedresult.clever.vcf")
@Test
def replacementSucces = {
CleverFixVCF.replaceHeaderLine(
CleverFixVCF.vcfColHeader,
CleverFixVCF.vcfColHeader,
CleverFixVCF.vcfColReplacementHeader + "testsample",
CleverFixVCF.extraHeader
) should equal(CleverFixVCF.extraHeader + "\n" + CleverFixVCF.vcfColReplacementHeader + "testsample" + "\n")
}
@Test
def replacementOther = {
val vcfRecord = "chrM\t312\tL743020\t.\t<DEL>\t.\tPASS\tBPWINDOW=313,16189;CILEN=15866,15888;IMPRECISE;SVLEN=-15877;SVTYPE=DEL\tGT:DP\t1/.:103"
val vcfRecordExpected = "chrM\t312\tL743020\tN\t<DEL>\t.\tPASS\tBPWINDOW=313,16189;CILEN=15866,15888;IMPRECISE;SVLEN=-15877;SVTYPE=DEL\tGT:DP\t1/.:103"
CleverFixVCF.replaceHeaderLine(
vcfRecord,
CleverFixVCF.vcfColHeader,
CleverFixVCF.vcfColReplacementHeader + "testsample",
CleverFixVCF.extraHeader
) should equal(vcfRecordExpected + "\n")
}
@Test
def mainTest = {
val output = File.createTempFile("clever", ".test.vcf")
output.deleteOnExit()
val result = CleverFixVCF.main(Array(
"-i", rawCleverVCF.getAbsolutePath,
"-o", output.getAbsolutePath,
"-s", "testsample"
))
val exp = Source.fromFile(expectedCleverVCF).getLines()
val obs = Source.fromFile(output).getLines()
(exp zip obs).foreach(_ match {
case (a, b) => {
a shouldEqual (b)
}
case _ =>
})
}
@Test
def javaCommand = {
val output = File.createTempFile("clever", ".test.vcf")
output.deleteOnExit()
val cfvcf = new CleverFixVCF(null)
cfvcf.input = rawCleverVCF
cfvcf.output = output
cfvcf.sampleName = "testsample"
cfvcf.cmdLine should include("'-s' 'testsample'")
cfvcf.cmdLine should include(s"'-i' '${rawCleverVCF}'")
cfvcf.cmdLine should include(s"'-o' '${output}'")
}
}
......@@ -15,7 +15,8 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva.svcallers
import nl.lumc.sasc.biopet.extensions.breakdancer.{ BreakdancerVCF, BreakdancerCaller, BreakdancerConfig }
import nl.lumc.sasc.biopet.extensions.breakdancer.{ BreakdancerCaller, BreakdancerConfig, BreakdancerVCF }
import nl.lumc.sasc.biopet.extensions.picard.SortVcf
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Script for sv caler Breakdancer */
......@@ -31,10 +32,16 @@ class Breakdancer(val root: Configurable) extends SvCaller {
val bdcfg = BreakdancerConfig(this, bamFile, new File(breakdancerSampleDir, sample + ".breakdancer.cfg"))
val breakdancer = BreakdancerCaller(this, bdcfg.output, new File(breakdancerSampleDir, sample + ".breakdancer.tsv"))
val bdvcf = BreakdancerVCF(this, breakdancer.output, new File(breakdancerSampleDir, sample + ".breakdancer.vcf"))
add(bdcfg, breakdancer, bdvcf)
val bdvcf = BreakdancerVCF(this, breakdancer.output, new File(breakdancerSampleDir, sample + ".breakdancer.vcf"),
sample = sample + sampleNameSuffix)
addVCF(sample, bdvcf.output)
val compressedVCF = new SortVcf(this)
compressedVCF.input = bdvcf.output
compressedVCF.output = new File(breakdancerSampleDir, s"${sample}.breakdancer.vcf.gz")
add(bdcfg, breakdancer, bdvcf, compressedVCF)
addVCF(sample, compressedVCF.output)
}
}
}
......@@ -15,7 +15,8 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva.svcallers
import nl.lumc.sasc.biopet.extensions.clever.CleverCaller
import nl.lumc.sasc.biopet.extensions.clever.{ CleverCaller, CleverFixVCF }
import nl.lumc.sasc.biopet.extensions.picard.SortVcf
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Script for sv caler Clever */
......@@ -29,7 +30,19 @@ class Clever(val root: Configurable) extends SvCaller {
val clever = CleverCaller(this, bamFile, cleverDir)
add(clever)
addVCF(sample, clever.outputvcf)
val cleverVCF = new CleverFixVCF(this)
cleverVCF.input = clever.outputvcf
cleverVCF.output = new File(cleverDir, s".${sample}.clever.vcf")
cleverVCF.sampleName = sample + sampleNameSuffix
cleverVCF.isIntermediate = true
add(cleverVCF)
val compressedVCF = new SortVcf(this)
compressedVCF.input = cleverVCF.output
compressedVCF.output = new File(cleverDir, s"${sample}.clever.vcf.gz")
add(compressedVCF)
addVCF(sample, compressedVCF.output)
}
}
}
......@@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.pipelines.shiva.svcallers
import nl.lumc.sasc.biopet.extensions.delly.DellyCaller
import nl.lumc.sasc.biopet.extensions.gatk.CatVariants
import nl.lumc.sasc.biopet.extensions.picard.SortVcf
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Script for sv caller delly */
......@@ -41,7 +42,13 @@ class Delly(val root: Configurable) extends SvCaller {
delly.analysistype = "DEL"
delly.outputvcf = new File(dellyDir, sample + ".delly.del.vcf")
add(delly)
catVariants.variant :+= delly.outputvcf
val compressedVCF = new SortVcf(this)
compressedVCF.input = delly.outputvcf
compressedVCF.output = new File(dellyDir, s"${sample}.delly.del.vcf.gz")
add(compressedVCF)
catVariants.variant :+= compressedVCF.output
}
if (dup) {
val delly = new DellyCaller(this)
......@@ -49,7 +56,13 @@ class Delly(val root: Configurable) extends SvCaller {
delly.analysistype = "DUP"
delly.outputvcf = new File(dellyDir, sample + ".delly.dup.vcf")
add(delly)
catVariants.variant :+= delly.outputvcf
val compressedVCF = new SortVcf(this)
compressedVCF.input = delly.outputvcf
compressedVCF.output = new File(dellyDir, s"${sample}.delly.dup.vcf.gz")
add(compressedVCF)
catVariants.variant :+= compressedVCF.output
}
if (inv) {
val delly = new DellyCaller(this)
......@@ -57,18 +70,30 @@ class Delly(val root: Configurable) extends SvCaller {
delly.analysistype = "INV"
delly.outputvcf = new File(dellyDir, sample + ".delly.inv.vcf")
add(delly)
catVariants.variant :+= delly.outputvcf
val compressedVCF = new SortVcf(this)
compressedVCF.input = delly.outputvcf
compressedVCF.output = new File(dellyDir, s"${sample}.delly.inv.vcf.gz")
add(compressedVCF)
catVariants.variant :+= compressedVCF.output
}
if (tra) {
val delly = new DellyCaller(this)
delly.input = bamFile
delly.analysistype = "TRA"
delly.outputvcf = new File(dellyDir, sample + ".delly.tra.vcf")
catVariants.variant :+= delly.outputvcf
add(delly)
val compressedVCF = new SortVcf(this)
compressedVCF.input = delly.outputvcf
compressedVCF.output = new File(dellyDir, s"${sample}.delly.tra.vcf.gz")
add(compressedVCF)
catVariants.variant :+= compressedVCF.output
}
require(catVariants.variant.nonEmpty, "Must atleast 1 SV-type be selected for Delly")
require(catVariants.variant.nonEmpty, "At least 1 SV-type should be selected for Delly")
add(catVariants)
addVCF(sample, catVariants.outputFile)
......
......@@ -18,6 +18,7 @@ package nl.lumc.sasc.biopet.pipelines.shiva.svcallers
import java.text.SimpleDateFormat
import java.util.Calendar
import nl.lumc.sasc.biopet.extensions.picard.SortVcf
import nl.lumc.sasc.biopet.extensions.pindel._
import nl.lumc.sasc.biopet.utils.config.Configurable
......@@ -39,7 +40,7 @@ class Pindel(val root: Configurable) extends SvCaller {
val configFile: File = new File(pindelDir, sample + ".pindel.cfg")
val cfg = new PindelConfig(this)
cfg.input = bamFile
cfg.sampleName = sample
cfg.sampleName = sample + sampleNameSuffix
cfg.output = configFile
add(cfg)
......@@ -58,7 +59,12 @@ class Pindel(val root: Configurable) extends SvCaller {
pindelVcf.outputVCF = new File(pindelDir, s"${sample}.pindel.vcf")
add(pindelVcf)
addVCF(sample, pindelVcf.outputVCF)
val compressedVCF = new SortVcf(this)
compressedVCF.input = pindelVcf.outputVCF
compressedVCF.output = new File(pindelDir, s"${sample}.pindel.vcf.gz")
add(compressedVCF)
addVCF(sample, compressedVCF.output)
}