From 2b54d173d8c0bd138dd57e308eed0cc8808b5cf7 Mon Sep 17 00:00:00 2001
From: Wai Yi Leung <w.y.leung@e-sensei.nl>
Date: Fri, 6 May 2016 08:19:36 +0200
Subject: [PATCH] Fix breakdancer tsv converter to vcf

---
 .../extensions/breakdancer/breakdancer2vcf.py    | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/breakdancer/breakdancer2vcf.py b/public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/breakdancer/breakdancer2vcf.py
index 268e4df4d..0430667ca 100644
--- a/public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/breakdancer/breakdancer2vcf.py
+++ b/public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/breakdancer/breakdancer2vcf.py
@@ -30,19 +30,21 @@ import csv
 import datetime
 
 
-def main(tsvfile, vcffile):
+def main(tsvfile, vcffile, samplename):
     '''
     :param tsvfile: filename of input file.tsv
     :type tsvfile: string
     :param vcffile: filename of output file.vcf
     :type vcffile: string
+    :param samplename: Name of the sample
+    :type samplename: string
     '''
     with open(tsvfile) as reader:
         # Parse file
         dictreader = _parse_tsvfile(reader)
 
         # Write out file
-        _format_vcffile(dictreader, vcffile)
+        _format_vcffile(dictreader, vcffile, samplename)
 
 def _parse_tsvfile(readable):
     '''
@@ -138,7 +140,7 @@ VCF_HEADER = """##fileformat=VCFv4.1
 ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
 ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">""".format( filedate=TS_NOW.strftime( "%Y%m%d" ) )
 
-def _format_vcffile(dictreader, vcffile):
+def _format_vcffile(dictreader, vcffile, samplename):
     '''
     Create a pseudo .vcf file based on values read from DictReader instance.
     :param dictreader: DictReader instance to read data from
@@ -148,7 +150,7 @@ def _format_vcffile(dictreader, vcffile):
     '''
     FORMAT = "GT:DP"
     with open(vcffile, mode='w') as writer:
-        writer.write('{header}\n#{columns}\n'.format(header=VCF_HEADER, columns='\t'.join(_vcf_fields)))
+        writer.write('{header}\n#{columns}\n'.format(header=VCF_HEADER, columns='\t'.join(_vcf_fields + [samplename])))
         output_vcf = []
         for line in dictreader:
             CHROM = line['Chr1']
@@ -163,7 +165,7 @@ def _format_vcffile(dictreader, vcffile):
                 INFO += ';SVLEN={}'.format(int(line['Size']))
                 INFO += ";SVEND={}".format(SVEND)
                 INFO += ";END={}".format(SVEND)
-            
+
             # write alternate ALT field for Intrachromosomal translocations
             if line['Type'] in ['CTX']:
                 ALT = "N[{}:{}[".format(line['Chr2'], line['Pos2'])
@@ -187,6 +189,8 @@ if __name__ == '__main__':
             help='Breakdancer TSV outputfile')
     parser.add_argument('-o', '--outputvcf', dest='outputvcf', type=str,
             help='Output vcf to')
+    parser.add_argument('-s', '--sample', dest='sample', type=str,
+                        help='sample name')
 
     args = parser.parse_args()
-    main(args.breakdancertsv, args.outputvcf)
+    main(args.breakdancertsv, args.outputvcf, args.sample)
-- 
GitLab