Commit b049eb31 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub
Browse files

Merge pull request #157 from biopet/fix-BIOPET-736-peter

Adding mutect2 pipeline
parents b0ef88db cc5db58a
......@@ -28,7 +28,7 @@ class Bgzip(val parent: Configurable) extends BiopetCommandLineFunction {
var input: List[File] = Nil
@Output(doc = "Compressed output file", required = false)
var output: File = null
var output: File = _
var f: Boolean = config("f", default = false)
executable = config("exe", default = "bgzip", freeVar = false)
......@@ -39,7 +39,7 @@ class Bgzip(val parent: Configurable) extends BiopetCommandLineFunction {
if (output == null && !outputAsStdout) Logging.addError("Output is missing for Bgzip")
}
def cmdLine =
def cmdLine: String =
required(executable) +
conditional(f, "-f") +
" -c " + repeat(input) +
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.bcftools
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Output, Input}
class BcftoolsReheader(val parent: Configurable) extends Bcftools {
@Input(doc = "Input vcf file", required = false)
var input: File = _
@Input(doc = "File specifying how sample names should be renamed", required = true)
var renameSamples: File = _
@Output(doc = "Output vcf file", required = false)
var output: File = _
def cmdLine: String =
required(executable) +
required("reheader") +
required("--samples", renameSamples) +
optional("--output", output) +
optional(input)
}
object BcftoolsReheader {
def apply(parent: Configurable, renameSamples: File): BcftoolsReheader = {
val reheader = new BcftoolsReheader(parent)
reheader.renameSamples = renameSamples
reheader
}
}
......@@ -418,3 +418,11 @@ trait CommandLineGATK extends BiopetJavaCommandLineFunction with Reference with
optional("-l", logging_level, spaceSeparated = true, escape = true, format = "%s") +
optional("-log", log_to_file, spaceSeparated = true, escape = true, format = "%s")
}
object CommandLineGATK {
def isFileWithTag(file: File, tag: String): Boolean = file match {
case f:TaggedFile => f.tag == tag
case _ => false
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.extensions.gatk.CommandLineGATK.isFileWithTag
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import org.broadinstitute.gatk.utils.commandline.{Argument, Input, Output}
class ContEst(val parent: Configurable) extends CommandLineGATK {
def analysis_type: String = "ContEst"
/** Getter and setter for tumor sample bam file. */
def tumorSampleBam = input_file.find(file => isFileWithTag(file, "eval")).getOrElse(null)
def tumorSampleBam_= (value:File):Unit = {
input_file = input_file.filterNot(file => isFileWithTag(file, "eval"))
input_file :+= TaggedFile(value, "eval")
}
/** Getter and setter for normal sample bam file. */
def normalSampleBam = input_file.find(file => isFileWithTag(file, "genotype")).getOrElse(null)
def normalSampleBam_= (value:File):Unit = {
input_file = input_file.filterNot(file => isFileWithTag(file, "genotype"))
input_file :+= TaggedFile(value, "genotype")
}
/** Variant file containing information about the population allele frequencies. */
@Input(fullName = "popfile", shortName="pf", required = true)
var popFile: File = config("popfile")
/** Output file of the program. */
@Output(fullName = "out", shortName = "o", required = true)
var output: File = _
/** Where to write a full report about the loci we processed. */
@Output(fullName = "base_report", shortName = "br", required = false)
var baseReportFile: Option[File] = config("base_report")
@Argument(fullName = "beta_threshold", required = false)
var betaThreshold: Option[Double] = config("beta_threshold")
@Argument(fullName = "genotype_mode", shortName= "gm", required = false)
var genotypeMode: Option[String] = config("genotype_mode")
@Argument(fullName = "lane_level_contamination", shortName= "llc", required = false)
var laneLevelContamination: String = "SAMPLE"
@Output(fullName = "likelihood_file", shortName = "lf", required = false)
var likelihoodFile: Option[File] = config("likelihood_file")
/** Threshold for minimum mapping quality score.
* Default value: 20 */
@Argument(fullName = "min_mapq", required = false)
var minMapQ: Option[Int] = config("min_mapq")
/** Threshold for minimum base quality score.
* Default value: 20 */
@Argument(fullName = "min_qscore", required = false)
var minQScore: Option[Int] = config("min_qscore")
@Argument(fullName = "minimum_base_count", shortName = "mbc", required = false)
var minimumBaseCount: Option[Int] = config("minimum_base_count")
/** Evaluate contamination for just a single contamination population.
* Default value: CEU */
@Argument(fullName = "population", shortName = "population", required = false)
var population: Option[String] = config("population")
@Argument(fullName = "precision", shortName = "pc", required = false)
var precision: Option[Double] = config("precision")
@Argument(fullName = "trim_fraction", required = false)
var trimFraction: Option[Double] = config("trim_fraction")
override def cmdLine = super.cmdLine +
required("--popfile", popFile) +
required("--out", output) +
optional("--base_report", baseReportFile) +
optional("--beta_threshold", betaThreshold) +
optional("--genotype_mode", genotypeMode) +
optional("--lane_level_contamination", laneLevelContamination) +
optional("--likelihood_file", likelihoodFile) +
optional("--min_mapq", minMapQ) +
optional("--min_qscore", minQScore) +
optional("--minimum_base_count", minimumBaseCount) +
optional("--population", population) +
optional("--precision", precision) +
optional("--trim_fraction", trimFraction)
}
object ContEst {
def apply(parent: Configurable, tumorSampleBam: File, normalSampleBam: File, output: File): ContEst = {
val conest = new ContEst(parent)
conest.tumorSampleBam = tumorSampleBam
conest.normalSampleBam = normalSampleBam
conest.output = output
conest
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Argument, Gather, Input, Output}
class MuTect2(val parent: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
scatterClass = classOf[LocusScatterFunction]
setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false }
def analysis_type: String = "MuTect2"
/** vcf file with info from cosmic db TODO desc */
@Input(fullName = "cosmic", shortName = "cosmic", required = false)
var cosmic: Option[File] = config("cosmic")
/** Vcf file of the dbSNP database. When it's provided, then it's possible to use the param 'dbsnpNormalLod', see the
* description of that parameter for explanation. In addition, sIDs from this file are used to populate the ID column
* of the output.
* */
@Input(fullName = "dbsnp", shortName = "D", required = false)
var dbsnp: Option[File] = dbsnpVcfFile
@Input(fullName = "normal_panel", shortName = "PON", required = false)
var ponFile: Option[File] = config("normal_panel")
@Input(fullName = "contamination_fraction_per_sample_file", shortName = "contaminationFile", required = false)
var contaminationFile: Option[File] = config("contamination_file")
/** Output file of the program. */
@Output(fullName = "out", shortName = "o", required = false)
@Gather(classOf[CatVariantsGatherer])
var outputVcf: File = _
/**
* The very first threshold value that is used when assessing if a particular site is a variant in the tumor sample.
* Two probabilities are calculated: probability of the model that the site is a variant and the probability of the
* model that it's not. A site is classified as a candidate variant if the logarithm of the ratio between these 2
* probabilities is higher than this threshold. Candidate variants are subject to further filtering but this value
* decides if the site enters this processing, it is used for the initial selection. Raising the value increases
* specificity and lowers sensitivity.
*
* Default value: 6.3
* */
@Argument(fullName = "tumor_lod", required = false)
var tumorLOD: Option[Double] = None
/** TODO: unclear how it differs from the param above 'tumorLod'.
* Default value: 4.0
* */
@Argument(fullName = "initial_tumor_lod", required = false)
var initialTumorLOD: Option[Double] = None
/**
* A threshold used when deciding if a variant found in tumor is reference in the normal sample (the parameter used when
* finding variants in tumor is described above - 'tumorLod'). If it is reference in the normal, then the variant is
* classified as a somatic variant existing only in tumor. The threshold is applied on the logarithm of the ratio
* between the probabilities of the model that normal doesn't have a variant at the site and the probability
* of the model that it has a variant.
*
* Default value: 2.2
* */
@Argument(fullName = "normal_lod", required = false)
var normalLOD: Option[Double] = None
/** TODO: unclear how it differs from the param above 'normalLod'.
* Default value: 0.5
* */
@Argument(fullName = "initial_normal_lod", required = false)
var initialNormalLOD: Option[Double] = None
/**
* Modelling takes into account also the probability for a site to have a variant in the general population. For sites
* in the dbSNP database the probability is higher, so the threshold applied when deciding that a variant found in tumor
* is missing in the normal should also be higher. If a site is in the dbSNP database then this threshold is used, if it's
* not then the parameter 'normalLod' is used.
*
* Default value: 5.5
* */
@Argument(fullName = "dbsnp_normal_lod", required = false, otherArgumentRequired = "dbsnp")
var dbsnpNormalLod: Option[Double] = None
/** If this fraction is greater than zero, the caller will aggressively attempt to remove contamination through
* biased down-sampling of reads (for all samples). It will ignore the contamination fraction of reads for each
* alternate allele. So if the pileup contains N total bases, then we will try to remove (N * contamination fraction)
* bases for each alternate allele.
* Default value: 0
* */
@Argument(fullName = "contamination_fraction_to_filter", shortName="contamination", required = false)
var contaminationFractionToFilter: Option[Double] = config("contamination_fraction_to_filter")
/** One or more classes/groups of annotations to apply to variant calls */
@Argument(fullName = "group", shortName = "G", doc = "One or more classes/groups of annotations to apply to variant calls", required = false)
var group: List[String] = config("group", default = Nil)
/** Heterozygosity value used to compute prior likelihoods for any locus. */
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
var heterozygosity: Option[Double] = config("heterozygosity")
/** Heterozygosity for indel calling. */
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
var heterozygosityForIndels: Option[Double] = config("indel_heterozygosity")
/** Standard deviation of heterozygosity. */
@Argument(fullName = "heterozygosity_stdev", shortName = "heterozygosityStandardDeviation", doc = "Standard deviation of heterozygosity", required = false)
var heterozygositySD: Option[Double] = config("heterozygosity_stdev")
/** Value used for filtering tumor variants. If a position that has been found to have a variant in tumor, has reads with the variant alternate allele also in the normal sample and if the number of such reads is higher than the value given with this parameter and the sum of the quality scores in these reads in this position is higher than given with the parameter 'maxAltAllelesInNormalQScoreSum', then the variant is filtered out from the final result.
* Default value: 1 */
@Argument(fullName = "max_alt_alleles_in_normal_count", required = false)
var maxAltAllelesInNormalCount: Option[Int] = config("max_alt_alleles_in_normal_count")
/** Value used for filtering tumor variants. If a position that has been found to have a variant in tumor, has reads with the variant alternate allele also in the normal sample and if the fraction of such reads from all reads is higher than the value given with this parameter and the sum of the quality scores in these reads in this position is higher than given with the parameter 'maxAltAllelesInNormalQScoreSum', then the variant is filtered out from the final result.
* Default value: 0.03 */
@Argument(fullName = "max_alt_alleles_in_normal_fraction", required = false)
var maxAltAllelesInNormalFraction: Option[Double] = config("max_alt_alleles_in_normal_fraction")
/** For explanation see the description of the parameters above.
* Default value: 20 */
@Argument(fullName = "max_alt_alleles_in_qscore_sum", required = false)
var maxAltAllelesInNormalQScoreSum: Option[Int] = config("max_alt_alleles_in_qscore_sum")
/** Maximum reads in an active region. When downsampling, level the coverage of the reads in each sample to no more
* than maxReadsInRegionPerSample reads, not reducing coverage at any read start to less than minReadsPerAlignmentStart */
@Argument(fullName = "maxReadsInRegionPerSample", shortName = "maxReadsInRegionPerSample", required = false)
var maxReadsInRegionPerSample: Option[Int] = config("maxReadsInRegionPerSample")
/** Minimum base quality required to consider a base for calling.
* Default value: 10 */
@Argument(fullName = "min_base_quality_score", shortName = "mbq", required = false)
var minBaseQScore: Option[Int] = config("min_base_quality_score")
/** Minimum number of reads sharing the same alignment start for each genomic location in an active region */
@Argument(fullName = "minReadsPerAlignmentStart", shortName = "minReadsPerAlignStart", required = false)
var minReadsPerAlignmentStart: Option[Int] = config("minReadsPerAlignmentStart")
/** Value used for filtering tumor variants to exclude false positives caused by misalignments evidenced by alternate alleles occurring near the start and end of reads. Variants are rejected if their median distance from the start/end of of the reads is lower than this parameter and if the median absolute deviation is lower than the value given with the parameter 'pir_mad_threshold'. Filtering is done only if the parameter 'enableClusteredReadPositionFilter' is set to true.
* Default value: 10 */
@Argument(fullName = "pir_median_threshold", required = false)
var pirMedianThreshold: Option[Double] = config("pir_median_threshold")
/** Value used for filtering tumor variants to exclude false positives caused by misalignments evidenced by alternate alleles occurring near the start and end of reads. Variants are rejected if their median distance from the start/end of the reads is lower than given with the parameter 'pirMedianThreshold' and if the median absolute deviation is lower than given with this parameter. Filtering is done only if the parameter 'enableClusteredReadPositionFilter' is set to true.
* Default value: 3 */
@Argument(fullName = "pir_mad_threshold", required = false)
var pirMadThreshold: Option[Double] = config("pir_mad_threshold")
/** TODO */
@Argument(fullName = "power_constant_qscore", required = false)
var powerConstantQScore: Option[Int] = config("power_constant_qscore")
/** Ploidy per sample. For pooled data, this should be set to (Number of samples in each pool x Sample Ploidy).
* Default value: 2
* */
@Argument(fullName = "sample_ploidy", shortName="ploidy", required = false)
var ploidy: Option[Int] = config("sample_ploidy")
/** The minimum phred-scaled confidence threshold at which variants should be called. */
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", required = false)
var standardCallConf: Option[Double] = config("stand_call_conf")
// flags:
/** TODO */
@Argument(fullName = "annotateNDA", shortName = "nda", required = false)
var annotateNDA: Boolean = config("annotate_nda", default = false)
/** If this parameter is set to true, then tumor variants are filtered to exclude false positives that are caused by misalignments evidenced by alternate alleles occurring near the start and end of reads. For filtering values of the parameters 'pirMedianThreshold' and 'pirMadThreshold' are used.
* Default value: false */
@Argument(fullName = "enable_clustered_read_position_filter", required = false)
var enableClusteredReadPositionFilter: Boolean = config("enable_clustered_read_position_filter", default = false)
/** TODO */
@Argument(fullName = "enable_strand_artifact_filter", required = false)
var enableStrandArtifactFilter: Boolean = config("enable_strand_artifact_filter", default = false)
/** TODO */
@Argument(fullName = "useNewAFCalculator", shortName = "newQual", required = false)
var useNewAFCalculator: Boolean = config("use_new_af_calculator", default = false)
override def cmdLine: String = super.cmdLine +
optional("--cosmic", cosmic) +
optional("--dbsnp", dbsnp) +
optional("--normal_panel", ponFile) +
optional("--contamination_fraction_per_sample_file", contaminationFile) +
optional("--contamination_fraction_to_filter", contaminationFractionToFilter) +
optional("--dbsnp_normal_lod", dbsnpNormalLod) +
repeat("--group", group) +
optional("--heterozygosity", heterozygosity) +
optional("--heterozygosity_stdev", heterozygositySD) +
optional("--indel_heterozygosity", heterozygosityForIndels) +
optional("--initial_normal_lod", initialNormalLOD) +
optional("--initial_tumor_lod", initialTumorLOD) +
optional("--max_alt_alleles_in_normal_count", maxAltAllelesInNormalCount) +
optional("--max_alt_alleles_in_normal_fraction", maxAltAllelesInNormalFraction) +
optional("--max_alt_alleles_in_qscore_sum", maxAltAllelesInNormalQScoreSum) +
optional("--maxReadsInRegionPerSample", maxReadsInRegionPerSample) +
optional("--min_base_quality_score", minBaseQScore) +
optional("--normal_lod", normalLOD) +
optional("--pir_mad_threshold", pirMadThreshold) +
optional("--pir_median_threshold", pirMedianThreshold) +
optional("--power_constant_qscore", powerConstantQScore) +
optional("--sample_ploidy", ploidy) +
optional("-stand_call_conf", standardCallConf) +
optional("--tumor_lod", tumorLOD) +
conditional(annotateNDA, "--annotateNDA") +
conditional(enableClusteredReadPositionFilter, "--enable_clustered_read_position_filter") +
conditional(enableStrandArtifactFilter, "--enable_strand_artifact_filter") +
conditional(useNewAFCalculator, "--useNewAFCalculator") +
(if (outputAsStdout) "" else required("--out", outputVcf))
}
......@@ -416,8 +416,9 @@ object ConfigUtils extends Logging {
Logging.addError(
"Value does not exist but is required, key: " + value.requestIndex.key +
" namespace: " + value.requestIndex.module,
if (value.requestIndex.path != Nil) " path: " + value.requestIndex.path.mkString("->")
else null
if (value.requestIndex.path != Nil)
Some(" path: " + value.requestIndex.path.mkString("->"))
else None
)
exist
}
......
......@@ -93,4 +93,10 @@ object IoUtils {
case e: IOException => false
}
}
def writeLinesToFile(output: File, lines: List[String]): Unit = {
val writer = new PrintWriter(output)
lines.foreach(writer.println(_))
writer.close()
}
}
......@@ -38,10 +38,19 @@ object Logging {
private[biopet] val errors: ListBuffer[Exception] = ListBuffer()
def addError(error: String, debug: String = null): Unit = {
val msg = error + (if (debug != null && logger.isDebugEnabled) "; " + debug else "")
logger.error(msg)
errors.append(new Exception(msg))
def addError(error: String,
debug: Option[String] = None,
cause: Option[Exception] = None): Unit = {
val msg = error + debug.map("; " + _).getOrElse("")
cause match {
case Some(e) =>
logger.error(msg, e)
errors.append(new Exception(msg, e))
case _ =>
logger.error(msg)
errors.append(new Exception(msg))
}
}
def checkErrors(debug: Boolean = false): Unit = {
......
......@@ -24,7 +24,9 @@ import nl.lumc.sasc.biopet.extensions.tools.ValidateVcf
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
import nl.lumc.sasc.biopet.pipelines.kopisu.Kopisu
import nl.lumc.sasc.biopet.pipelines.mapping.{Mapping, MultisampleMappingTrait}
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.somatic.TumorNormalPair
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.function.QFunction
......@@ -59,7 +61,24 @@ class Shiva(val parent: Configurable)
override def namePrefix = "multisample"
override def configNamespace: String = "shivavariantcalling"
override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
genders = samples.map { case (sampleName, sample) => sampleName -> sample.gender }
genders = samples.map { case (sampleName, s) => sampleName -> s.gender }
//TODO: this needs changed when the sample/library refactoring is beeing done
tumorSamples = samples
.filter(_._2.sampleTags.get("type").contains("tumor"))
.flatMap {
case (tumorName, tumorSample) =>
tumorSample.sampleTags.get("control") match {
case Some(normal: String) =>
if (!samples.contains(normal))
Logging.addError(s"Normal sample '$normal' does not exist")
Some(TumorNormalPair(tumorName, normal))
case _ =>
Logging.addError(s"Control is missing for tumor sample '$tumorName'")
None
}
}
.toList
} else
new ShivaVariantcalling(qscript) {
override def configNamespace = "shivavariantcalling"
......
......@@ -22,6 +22,11 @@ import nl.lumc.sasc.biopet.extensions.gatk.{CombineVariants, GenotypeConcordance
import nl.lumc.sasc.biopet.extensions.tools.VcfStats
import nl.lumc.sasc.biopet.extensions.vt.{VtDecompose, VtNormalize}
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.somatic.{
MuTect2,
SomaticVariantCaller,
TumorNormalPair
}
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.{VarscanCnsSingleSample, _}
import nl.lumc.sasc.biopet.utils.{BamUtils, Logging}
import nl.lumc.sasc.biopet.utils.config.Configurable
......@@ -51,20 +56,45 @@ class ShivaVariantcalling(val parent: Configurable)
var genders: Map[String, Gender.Value] = _
var tumorSamples: List[TumorNormalPair] = _
def isGermlineVariantCallingConfigured(): Boolean = {
callers.exists(!_.isInstanceOf[SomaticVariantCaller])
}
def isSomaticVariantCallingConfigured(): Boolean = {
callers.exists(_.isInstanceOf[SomaticVariantCaller])
}
/** Executed before script */
def init(): Unit = {
if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
//TODO: this needs changed when the sample/library refactoring is beeing done
if (Option(genders).isEmpty) genders = {
val samples: Map[String, Any] = config("genders", default = Map())
samples.map {
case (sampleName, gender) =>
sampleName -> (gender.toString.toLowerCase match {
case "male" => Gender.Male
case "female" => Gender.Female
case _ => Gender.Unknown
})
}
inputBams.keys.map { sampleName =>
val gender: Option[String] =
config("gender", path = "samples" :: sampleName :: "tags" :: Nil)
sampleName -> (gender match {
case Some("male") => Gender.Male
case Some("female") => Gender.Female
case _ => Gender.Unknown
})
}.toMap
}
//TODO: this needs changed when the sample/library refactoring is beeing done
if (Option(tumorSamples).isEmpty)
tumorSamples = inputBams.keys
.filter(name =>
config("type", path = "samples" :: name :: "tags" :: Nil, default = "normal").asString.toLowerCase == "tumor")
.map { tumorSample =>
val normal: String = config("normal", path = "samples" :: tumorSample :: "tags" :: Nil)
if (!inputBams.keySet.contains(normal))
Logging.addError(s"Normal sample '$normal' does not exist")
TumorNormalPair(tumorSample, normal)
}
.toList
}
var referenceVcf: Option[File] = config("reference_vcf")
......@@ -108,7 +138,10 @@ class ShivaVariantcalling(val parent: Configurable)
.callersList(this)
.map(_.name)
.mkString(", "))
if (!callers.exists(_.mergeVcfResults))
if (!isGermlineVariantCallingConfigured())
Logging.addError(
"For running the pipeline at least one germline variant caller has to be configured")
else if (!callers.exists(_.mergeVcfResults))
Logging.addError("must select at least 1 variantcaller where merge_vcf_re