Commit cb7a051c authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'release-0.7.0' into 'master'

Release 0.7.0



See merge request !449
parents 23c1ccea bf548a42
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
/**
* Created by wyleung on 11-2-16.
*/
......@@ -35,7 +49,7 @@ class TinyCapTest extends TestNGSuite with Matchers {
def initPipeline(map: Map[String, Any]): TinyCap = {
new TinyCap() {
override def configName = "tinycap"
override def configNamespace = "tinycap"
override def globalConfig = new Config(map)
......@@ -120,7 +134,7 @@ object TinyCapTest {
"cutadapt" -> Map("exe" -> "test"),
"bowtie" -> Map("exe" -> "test"),
"htseqcount" -> Map("exe" -> "test"),
"igvtools" -> Map("exe" -> "test"),
"igvtools" -> Map("exe" -> "test", "igvtools_jar" -> "test"),
"wigtobigwig" -> Map("exe" -> "test")
)
......
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>Toucan</artifactId>
<packaging>jar</packaging>
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.7.0</version>
<relativePath>../</relativePath>
</parent>
<inceptionYear>2014</inceptionYear>
<name>Toucan</name>
<dependencies>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetCore</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetToolsExtensions</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.10</artifactId>
<version>2.2.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
......@@ -8,8 +8,7 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
......
......@@ -8,8 +8,7 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
......
......@@ -8,23 +8,28 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.toucan
import java.io.File
import htsjdk.samtools.reference.FastaSequenceFile
import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsIntersect, BedtoolsMerge }
import nl.lumc.sasc.biopet.extensions.gatk.{ CatVariants, SelectVariants }
import nl.lumc.sasc.biopet.extensions.manwe.{ ManweAnnotateVcf, ManweSamplesImport }
import nl.lumc.sasc.biopet.extensions.tools.{ GvcfToBed, VcfWithVcf, VepNormalizer }
import nl.lumc.sasc.biopet.extensions.{ Bgzip, Ln, VariantEffectPredictor }
import nl.lumc.sasc.biopet.utils.VcfUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import org.broadinstitute.gatk.queue.QScript
import scalaz._, Scalaz._
/**
* Pipeline to annotate a vcf file with VEP
......@@ -35,20 +40,43 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
def this() = this(null)
@Input(doc = "Input VCF file", shortName = "Input", required = true)
var inputVCF: File = _
var inputVcf: File = _
@Input(doc = "Input GVCF file", shortName = "gvcf", required = false)
var inputGvcf: Option[File] = None
var sampleIds: List[String] = Nil
def init(): Unit = {
inputFiles :+= new InputFile(inputVCF)
sampleIds = root match {
case m: MultiSampleQScript => m.samples.keys.toList
case null => VcfUtils.getSampleIds(inputVCF)
case s: SampleLibraryTag => s.sampleId.toList
case _ => throw new IllegalArgumentException("You don't have any samples")
def outputName = inputVcf.getName.stripSuffix(".vcf.gz")
def outputVcf: File = (gonlVcfFile, exacVcfFile) match {
case (Some(_), Some(_)) => new File(outputDir, s"$outputName.vep.normalized.gonl.exac.vcf.gz")
case (Some(_), _) => new File(outputDir, s"$outputName.vep.normalized.gonl.vcf.gz")
case (_, Some(_)) => new File(outputDir, s"$outputName.vep.normalized.exac.vcf.gz")
case _ => new File(outputDir, s"$outputName.vep.normalized.vcf.gz")
}
lazy val minScatterGenomeSize: Long = config("min_scatter_genome_size", default = 75000000)
lazy val enableScatter: Boolean = config("enable_scatter", default = {
val ref = new FastaSequenceFile(referenceFasta(), true)
val refLenght = ref.getSequenceDictionary.getReferenceLength
ref.close()
refLenght > minScatterGenomeSize
})
def sampleInfo: Map[String, Map[String, Any]] = root match {
case m: MultiSampleQScript => m.samples.map { case (sampleId, sample) => sampleId -> sample.sampleTags }
case null => VcfUtils.getSampleIds(inputVcf).map(x => x -> Map[String, Any]()).toMap
case s: SampleLibraryTag => s.sampleId.map(x => x -> Map[String, Any]()).toMap
case _ => throw new IllegalArgumentException("")
}
lazy val gonlVcfFile: Option[File] = config("gonl_vcf")
lazy val exacVcfFile: Option[File] = config("exac_vcf")
lazy val doVarda: Boolean = config("use_varda", default = false)
def init(): Unit = {
require(inputVcf != null, "No Input vcf given")
inputFiles :+= new InputFile(inputVcf)
}
override def defaults = Map(
......@@ -56,29 +84,57 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
)
def biopetScript(): Unit = {
val doVarda: Boolean = config("use_varda", default = false)
val useVcf: File = if (doVarda) {
inputGvcf match {
case Some(s) => varda(inputVCF, s)
case Some(s) => varda(inputVcf, s)
case _ => throw new IllegalArgumentException("You have not specified a GVCF file")
}
} else inputVCF
} else inputVcf
if (enableScatter) {
val outputVcfFiles = BedRecordList.fromReference(referenceFasta())
.scatter(config("bin_size", default = 50000000))
.allRecords.map { region =>
val chunkName = s"${region.chr}-${region.start}-${region.end}"
val chunkDir = new File(outputDir, "chunk" + File.separator + chunkName)
chunkDir.mkdirs()
val bedFile = new File(chunkDir, chunkName + ".bed")
BedRecordList.fromList(List(region)).writeToFile(bedFile)
bedFile.deleteOnExit()
val sv = new SelectVariants(this)
sv.variant = useVcf
sv.out = new File(chunkDir, chunkName + ".vcf.gz")
sv.intervals :+= bedFile
sv.isIntermediate = true
add(sv)
runChunk(sv.out, chunkDir, chunkName)
}
val cv = new CatVariants(this)
cv.variant = outputVcfFiles.toList
cv.outputFile = outputVcf
add(cv)
} else runChunk(useVcf, outputDir, outputName)
addSummaryJobs()
}
protected def runChunk(file: File, chunkDir: File, chunkName: String): File = {
val vep = new VariantEffectPredictor(this)
vep.input = useVcf
vep.output = new File(outputDir, inputVCF.getName.stripSuffix(".gz").stripSuffix(".vcf") + ".vep.vcf")
vep.input = file
vep.output = new File(chunkDir, chunkName + ".vep.vcf")
vep.isIntermediate = true
add(vep)
addSummarizable(vep, "variant_effect_predictor")
val normalizer = new VepNormalizer(this)
normalizer.inputVCF = vep.output
normalizer.outputVcf = swapExt(outputDir, vep.output, ".vcf", ".normalized.vcf.gz")
normalizer.outputVcf = new File(chunkDir, chunkName + ".vep.normalized.vcf.gz")
normalizer.isIntermediate = enableScatter || gonlVcfFile.isDefined || exacVcfFile.isDefined
add(normalizer)
// Optional annotation steps, depend is some files existing in the config
val gonlVcfFile: Option[File] = config("gonl_vcf")
val exacVcfFile: Option[File] = config("exac_vcf")
var outputFile = normalizer.outputVcf
gonlVcfFile match {
......@@ -86,8 +142,9 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = gonlFile
vcfWithVcf.output = swapExt(outputDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz")
vcfWithVcf.output = swapExt(chunkDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz")
vcfWithVcf.fields ::= ("AF", "AF_gonl", None)
vcfWithVcf.isIntermediate = enableScatter || exacVcfFile.isDefined
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
......@@ -98,29 +155,31 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = exacFile
vcfWithVcf.output = swapExt(outputDir, outputFile, ".vcf.gz", ".exac.vcf.gz")
vcfWithVcf.output = swapExt(chunkDir, outputFile, ".vcf.gz", ".exac.vcf.gz")
vcfWithVcf.fields ::= ("AF", "AF_exac", None)
vcfWithVcf.isIntermediate = enableScatter
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
}
addSummaryJobs()
outputFile
}
/**
* Performs the varda import and activate for one sample
*
* @param sampleID the sampleID to be used
* @param inputVcf the input VCF
* @param gVCF the gVCF for coverage
* @param annotation: ManweDownloadAnnotateVcf object of annotated vcf
* @return
*/
def importAndActivateSample(sampleID: String, inputVcf: File,
def importAndActivateSample(sampleID: String, sampleGroups: List[String], inputVcf: File,
gVCF: File, annotation: ManweAnnotateVcf): ManweActivateAfterAnnotImport = {
val minGQ: Int = config("minimum_genome_quality", default = 20, submodule = "manwe")
val isPublic: Boolean = config("varda_is_public", default = true, submodule = "manwe")
val minGQ: Int = config("minimum_genome_quality", default = 20, namespace = "manwe")
val isPublic: Boolean = config("varda_is_public", default = true, namespace = "manwe")
val bedTrack = new GvcfToBed(this)
bedTrack.inputVcf = gVCF
......@@ -142,8 +201,8 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
add(bgzippedBed)
val singleVcf = new BcftoolsView(this)
singleVcf.input = inputVCF
singleVcf.output = swapExt(outputDir, inputVCF, ".vcf.gz", s""".$sampleID.vcf.gz""")
singleVcf.input = inputVcf
singleVcf.output = swapExt(outputDir, inputVcf, ".vcf.gz", s""".$sampleID.vcf.gz""")
singleVcf.samples = List(sampleID)
singleVcf.minAC = Some(1)
singleVcf.isIntermediate = true
......@@ -165,6 +224,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
imported.beds = List(bgzippedBed.output)
imported.name = Some(sampleID)
imported.public = isPublic
imported.group = sampleGroups
imported.waitToComplete = false
imported.isIntermediate = true
imported.output = swapExt(outputDir, intersected.output, ".vcf.gz", ".manwe.import")
......@@ -179,14 +239,14 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
/**
* Perform varda analysis
*
* @param vcf input vcf
* @param gVcf The gVCF to be used for coverage calculations
* @return return vcf
*/
def varda(vcf: File, gVcf: File): File = {
val annotationQueries: List[String] = config("annotation_queries", default = List("GLOBAL *"), submodule = "manwe")
//TODO: add groups!!! Need sample-specific group tags for this
val annotationQueries: List[String] = config("annotation_queries", default = List("GLOBAL *"), namespace = "manwe")
val annotate = new ManweAnnotateVcf(this)
annotate.vcf = vcf
......@@ -202,7 +262,19 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
annotatedVcf.output = swapExt(outputDir, annotate.output, ".manwe.annot", "manwe.annot.vcf.gz")
add(annotatedVcf)
val activates = sampleIds map { x => importAndActivateSample(x, vcf, gVcf, annotate) }
val activates = sampleInfo map { x =>
val maybeSampleGroup = x._2.get("varda_group") match {
case None => Some(Nil)
case Some(vals) => vals match {
case xs: List[_] => xs
.traverse[Option, String] { x => Option(x.toString).filter(_ == x) }
case otherwise => None
}
}
val sampleGroup = maybeSampleGroup
.getOrElse(throw new IllegalArgumentException("Sample tag 'varda_group' is not a list of strings"))
importAndActivateSample(x._1, sampleGroup, vcf, gVcf, annotate)
}
val finalLn = new Ln(this)
activates.foreach(x => finalLn.deps :+= x.output)
......@@ -216,7 +288,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
def summaryFile = new File(outputDir, "Toucan.summary.json")
def summaryFiles = Map()
def summaryFiles = Map("input_vcf" -> inputVcf, "outputVcf" -> outputVcf)
def summarySettings = Map()
}
......
@HD VN:1.4 SO:unsorted
@SQ SN:chrQ LN:16571 M5:94445ec460a68206ae9781f71697d3db UR:file:/home/ahbbollen/fake_chrQ.fa
>chrQ
TCGGCATCTAGAAACTCCAAGTCTGCAGATCTATAACACACGAGGGTATTCAGCCCTAAAGCATTGCGTCTAGGCATGGGTCTTTAATCTCGCCCCGGCAACCTCTACGCGAGCTCCTACCAGTCAACGTGATTGATCCCTCGTACCTAGCATATTCCACGGCGTTTCATAGCCTTGACGGGCACCTTATCCATTTTTACCTAGTTTGACATTAATTGCTTCAAGACTACTGCGGTACGGGTCCCTGCATCCAGAAAGGATGACAAAAAGTGTGACAGTGCGTTCAATCCATAGATTCCCGTCGGTGCGAGGGCTGAAGTTGAGCCCTACAACCCGGCGATAAAAATTGTAACCCTATGCATTGCGTCAAGTTAAGACCCTCCATCTTCGCCACGCGATAGGAATTTGAAACATCGGCGTGCTTAGAAAGGCTATTAGCGCGGGTGGCCTGATCGGTATCCGATAAAGGCGTTGCCACAAATTTTGCCGATTGACCTGCAATTACATCGGGTAAGTGCATTTCCGCCGTTAGAGTATTAAGTCATGACGTTTTCGACATGGGCATTTAATTGGCCCAAGGTGGAATATTCCTGATGTGCCTAAGTATGAGCTCCCCATGAGAATTTGAAGATCATGAAGACTTGAGTCAATTCTAAATGTAGTTACCATCACTTAAGTTCGTTCGCCTTCTGGGCAGGCTATGTCTAACGAGCGACGTCCGCAGTGTGACAATCTAGATATTGGTTGAGCAGGAATACCAAGGAGGTGCTTGAAGTTTCTCTTATGGAACCAACTTCAATCAATAGGTAGTCTCCGTTTCGTTTCCACTGAGACATTTGTGCAGTGGCACAGTATGTGGTCGTAAGCGCATGGTGTTGTTGGAACGAAGACTCTCACTTTTGTTTCCTTTGGTAGTTTAATTTAGCAGTATCCTGGTCGTTGACCAATTTGTGAATCTCCACGGTCTCTCTTTATCAGTCATACCTCTTAAACCGGCTTCCGCTCCTGCGCACATTTATATCCTACTTTCCGTCAATGTGAAAGAGAGATCAACATAATTTCGCGCACGATCTGTCCCATTTCAGACGCCATCGACGGGTCCCGCCTTCACAAAAAACCTGCCACCATGCAGTAGGTACCTCCATATGCTGAGCGTCCGTTACCGGAAGGGTTAAGTCACTGTTTAAAGGATAAGGCGAGGTTTCCCTTGGTGGTGTACTACTGCTCACGTTCCTGCTTTACTCTCCGAGTATTGTTTTAAAAGTGGGCACGCTGGAACGCAGCACCCTATAAGAAGACTCAGTTAGGTCCCCTTCACGAGCATGTTTGAGCTCTCGGACCTAAGACGTCCGAACTCGGTTACGACGGTTAAAGACAAAGCTCCCAGCATGGTATCAGAAGCCACCACGCGTGAGGATGGCGGGGCCCTGGGCACATTGGCCCCGAATTCTTCGCCCACTTAAGTCGGACTCACCACGGGAGACTACGCCCTAACCGGATAATATCTTTTAGATACCATACAGGGTGCAACCATCGCGGCGTCAGCTGAATGTGAGGACAGAGCCCCACACACAGCACTAACAGATCCATGATTTTACTTCGTTTGGGCGACGTGCGAGCCTATTCGGCCTGTCGGACTTTGTGTAGATTCGGGATTTGACCTAGCGTATAGGCCTTTGGTATACTCGAATTATCAGGGTCAAACTACCCTCAAGGAATCCTATTTCAGACGGACGTTTGCCACCCCGGAACGTTGTCGGATCGCTTCTTGCCGCAGGAGATTCATGGAGTGATAATCGCTGGACTCACAAGTGTCAGCGGACTTTCGGTGTCTTGTGGCCTACTTGCAGTGAACACCACCAAACGAGTAGTATTATGGATTGCCGGCGTGTGTTTGTGGCCATGATTGGTTGATGCGACGGCCTAGATTCTCCTACGGGAATCTACCAGGCCCAAAAGCGCTGACTTTTATGTATTTGAGGGGCCGAAATTACATAGTAACCCAGAACAAATACCCGTTAGTTATAAAGTGAGCGCATAAGTTTGGTCGATCCGGCAGTCGAACCATTGCGGTCGGACATATCCGCAGTAGTACACTAAGGCGGAATAGACTGCCGAGTCAACGCTCCCTCATTCTTGCTACCTTAGATCTCGCAGGTTCGACCATTGCTGAAGCCGCTGAATTACACGAGTTGTTTTGTTAACCCCCGGAATGTAGTTCGTACGCCTCAACTGATTCTTCAAAAGCTCACTGCACGTGACTTGTCATGTGTTCCTAAAACATACCTCATCTGTGGGTCTGGTCCCATAAGCATGGAATGTCCGTCGACGCAACATGGAAACCCACTCGCTCGCTATACGTTTATGGTGAGACAGAAACACACTGTATTAGACTGCCACTGATAGCCCCAGTAGCAAGGTGATGTGGCAGGCATGGTACCCAAACGTCTATCGTTTTGGGAACCAAGGGGAGTGCTAATAGGTCCGGCCACGTAGAATGACATAACCTCCAGAGGAGCGCAGGAGTTGATGCATTAAAAGATCCATACTAAACGTTAGCTTAATGCCTTCAAGGCACCAGCTACCTCCATGACAAGGAGATTTCGGAAGGGGTAAGATTTACTTCTGTCCCAAAAGGGTAATGACCCGTAGGGATGGAATCATTGATGAACTCTAAAGGGACTCAGCCGACTAGCCGAGAGGGCTGGACGATCATTTGATGGGAGAATACGCATACATCTAAGTGTCAAGTATTAAATCGGTAATCTCCGTAAAGGCGTGAAGTTCACAGGGCGCAGTTTCCAGTTACTCCAAGAAACTACCGGTTCAGTTATCGCTTCCGGTGCCTTCACAGCAAACATCATTCGCTCAAGAAAGTGCTGTCGCTGCGTGTGGATATTTCCCCCTCGTTCTGGAAAGTGCTCTCAGCGACGCACGTAAACATGCTGGCGACGAGAGAGGCGTCAACACGGTCCGTTACCAAACTGCGGCATTTACCACGAACCTGATTGCAAAGTGAGATTTCCGTAAGGAGGTTAGCCAAATATTACGTAGAGTGTTCCACACCAAATCCGTCGTCCACATTCGCGACGGCAGTCTAGACGTGTAATTCCCCGGATAATCCAGTTACTACATGCTGATGCAGTCATAGTGCACGCAAATGCGCAACTTAACAAGCACGACCTGAAACAGAGAACCCCTGTGTAGTCAATATAGGATGACGGACACACACACTTGCTGCTGCAATCTTACATTCTGCGAACGAGTGCAAAGTTGAAATCATGACGAACAGCCTTGCTTTTCAGAGTCTCTATCGAACTCCTTTACACCTCCATATCTACTTGCAAATCACACTAGAGGGGCGCAGCTTACTCACTGAGAGATGGTCTACCTAATCGATTTTCGGTGAACTTTGAGTACAGCATTGAGTCTGGAGGGTTCCACTACTTTATCGTACCGGTCCGACATGATTTCTTATCGAATAGATGTTGAGATGGACATTAATAAGCATAGTACGTCTCGATCGATGGCTACCTTTACGTCTATGAGTGCTTACATAAGGTCTCTCGTAAGTCATGGTCCCGCGGGGCTCGCGCAACATTGTGGATTAATGACTCCAGTGACGCATGTTCGATTCGCATGAAGTAGGTGGCGCGTATTCATACATGAATAGTAGGCAGAACGAGCACATTGGACCGATCTTGGAGGTTGGGCTTGAGGTCCCGCACTGATAGTTTACGGCCATGAAGACGACAATTGTCAATACTTCTCTATCCTGAGCGAATGCTGACGGGGGCCAGGCGGAAAAGTGCGACACAGTCTACTGATTACGTGTAGTACGTGGCTAAGCATATTCCCGGGTTCGCAAACATAGGTCTCTGATGGGGTATGGGTAAGAAATCTGAAGGTTGCGTCTCTCACCACGGTCAGGATACCGAATCAGCTCATAAGAGTTACAAACGCGCAAATGAAGGCCTAGTCCACAGGGGTGTCATTCGCACGAGCTGGGCTTAGAATCACTGTTTTCGGTCGCACCTGTAGGGGGACATGGGACGGATTTCACAACAAAAAAGCATCTCTCATGATTCGATTCAGATAGAGGAGAGGAGGTAAATGCCAACAAATCTATTCTTTAGTACCGCCTGACGGAACGTATTTAATCCTCGCCTCAGATCGACACCGCAGGGTAGCTGAAGACGTCCGTTCTTAGACATTTAGTCGATCATTTGTTATGAAACAGAACTAGGAGTCCGTGCCCTTCAGGCCGGCTCAGGGGCACCTACCTCCAGATCGCCCAGGTTGGGTTTATTAGGCGCCGAAAAGTTACTGCCCTATCAGCCCCTCCAATCCGACCTACGGACATCATCCCACTGGCTCGCAAAATATAAATTGCGGATGGGAAAGGATAAGGAAAATCATTACCTACACAGAAGGACAATGTCAGTTCCAAATAACACTGATACTTTCGGAGCAACTTGGTCCGGAAATGTAAGTACGACTATAGCCCTTTCGACCAACGCCGACAGTCCTATTTGGACGCCGAGAGAGGCGACGGGTAGCCGAATGTAAAGCTCTCGGGTCGCTCTTGGCGGAATGCGCTGCGGGTCCTACCCTAAACCCTTACCACCACCAACTTCGTTAGGAGCCGTATAGATTACAGCTCCCGCAAAATTAGAGAGGAATCTGAGTTATTAGCTGAGGACCCCGCATTTTCTGCGACGGCGTAGCTGCAGTGACGTACGATATGAGTTCCCGACTGTGAGGGAGTCCCAGTCGTGACTCCCTACAACGGCTCCAGATATTGTTACTTATGGTCAATATGCCCCGACCGCCCATTGTCTCGAGTACAGTCTTCCCCAAAGTTAAGCTGTGCATTACCTTACCGTTTTAGGTCCAGCTGGTAGCACCGAATGCTGCGCAATCCGAGCCCCCGAAATAGACTACGTGTCCACGGTCAATTGTCATGGGTAGCAGAGCTCAAAGAGGAGAAACGTGCCCCGTAAACCTATTAGATCTCGGTTGATAAATATCAGGCCACAGCAGGCTGCCCGATGCTTGTTTGAACAACAACTTCGGGAGCCGCGGTCCTTGGTTCTCCCGATATTCGGCCGCACCGAACGGTACGCGTCATCGCGAGGTGCGTTCTCGCAGCAAGAAATATTTGTTGTTGTTGTCTTCCTTCCGCATAGGAAACCTTAAGCGGTACCTTTCTACGAAGTTGAACCCTAGAAGCACGTGTAACAATTTTTTTTACGCTACACCCGGATCTGCTTCCATCTGTTGATCATATGAGCCTAATGTGACTAATCTGTGCCGTCGATTGAAAATTCGTTCTGAACCTAATCACATGAATTAAAATTAGGGCGAGAATTGGCTCCTTTTGGGCCGTAATCCTTCAAAGGGTTAACCGAATTTAGCCTCCACGGTGACACAAACTCCCATAGGTAAGGCAAACCCAATAACGAGGAAGCCTTGCCCACAGCATGTTTGATAAATACCCTTAGGGTAATATCGCGTGCAATACTGAAGCCGCTCTTCTAGCATCCGTGTTTGACATACTATGACCTTGAAGCCTGCCGCAGCTTCTAGGTCATCCAAGTAGATCAAAACGCCATGTTGTGGATCCATGCATCTTCCCAGTGAACATGGATCTTAGTGTGACAGGCGAGGAGCGGCGAACACTATCGGTGTGGCAAGCTCGGGCCTTCGTACGTTGTGGAAGTATGCGAATAAGGAGACCGTAATGTATCAAGTTCTTAAGAGCCTTGGTACCGTTGCAATTCGGCATGTTCCTACAGAGACACTCCGTGTTTGTCATCCGTCATAGATCTATGGCGTAGTTAGCGCCTCTGAAGTAGTTGTCCATTCAGCAGGCATTGCTTAGGGAGTTTCTGGCGCTTGCCGCTCAAGATGCTCACGGGCCTAAGTAGCACGGCAACCTTTTGACAAAGCATTTTATAAACTGAGCATATTGGCCCGAAACTAATCCAGCAAAGGGTGAAGACCTGTCAGCGGGCCCAGAGTGTGAACGGTCTACTGCGCGGTACATAAGTGGCGTAATCCATCAACAAGACCTACACGACCTGAATGATTTCCAACAACTTTATATGCTTTTCCGCATCTCGAGAGTACCGGAATCTATGCAATCTCCCAAGGATCCGTAGATTTGAAATTCAATCCGACGGGGTAAGGTTGCCGCGCCGGTTAGCTAATGTGCGGATTTATAGTCTTTTTCCCAGAAGGCGTAGTTAGTTTCGCACCTAACTACGACACATACTTGGGTCGACTGTTGAAGGTGGTAAGTTGCGAGCAGTCCGCCGCTCTCACGCGCCGAACCACGTTCATATCGGCAAAGTTGCGCGATGACCTATAGGTGTGCAAAGCTCGTCCGACATTGGGATTGGATTCACGTACATACGTTAGTATCATGGGTAAGCTTCCATGTCAGCCTCGTGTATAGCACCGGTGCGCCGCGCGTTAAGGATTCTATGCCCAGCAAATGTGCCAACGTTGTGGGGAGAAAAGTGTAGTTGGATGCGATCGTGACATCGGCACACCGAAACTCTGCAGCCAGTCCCGCTAATCTCATTGGCACCGGGTAAGAGATTACCTTTGGTTAGGAATCGCGTGCGACGTACTGCACGAAAACAGTGCCTGAACCGAGGTGTTTACTTAGATGGTTCTAGACCCAGCATGTTCCTCACTGGAACCTGACGTCGGTACGTGATCCTCTATACCTCCTTTTCGGTATTGGCCTGGCAGCTACTCTAACTGTTTGGGCCGCGCCGATTTCTCGAGTCCACACGGCGAGGTCAGCAAAATTGCCAGTTAGTGGATGTTGGGATCTCAACGCATTACCATGAGAGTTCTTGGTTTACCCGTTAACATCGCTGCGCACGGTGTGAAAAGCCTGTTTCTTTGGCCCCCATCATCTTCGGCCCGCAGATCTCAGATCAATGATGTAAGGTTGCGGCGGCAAAGACTAGACTTGAGTCGTGAGATGGTGCTTTGCTGAGGCCGTCTCCTATAGCTTATTCTAGGACTTTCCGCAAACCACCCGACGTGCGGCTGTCCACGATCGGATTCCATTCTGTCTCGGAGCATACAGCACTAGATTTGCCGCTTGAAAAATGTTCCATAACCATGATTTCAACCCCATCTAGTCGGCAGGCACAGCTGAGAACAGCGAAGGGCGTCGTGAAGGGCATTGCCCGTAGTGTTTCAGACGTGCTAGAGACTAAATCAACTATCTGCACTCGTAGCCTGGCGTGTGAGATGTCACCACGATGTGCCTAGAGGAGTGATTATGAACATGTATTACCACGTCCGGGTGTCGACGGCTATATGGCTAACATTTCTTATGGCTAGACGTGCTTGGAAAGGTTCCCCAGCCTTCTGTTTCCCGGTGCTTTCCACGAGTCTGGAGTTCTGGTAATTAACTACATGGCGTTAACGCGGAGGTAACCCCCAGTCATTGCATTGCAGGTAGGGCTTAGGTGCAATATAATTCACCAAGGCGCGGATTCCTCACGATTGTTACGAAGACACCCGGAGGGTTTCAGTATGGCTTGAGAAGTGTACGTTTTTCCGGCCAGGGTGTAACTATAACCAACACATGTTTGGCCACGGGCTAAGTCGGTCCGCACGACTGATTTCCCCCGCCCATGTGTTTGGGAGCAATAAACTGCGTCTGCCAAGAGTAACAACTCGAGTAGAGAAGGGAAGTCTCAGACTATTTTGCAAATCAGACTGTAAGGCTCAACAGCCATACAGCTTGCCCTACTACTGAATACTAGCGTAGCGTGGCCACATAGGAAAGACTTCATGTCTTCTAATAACCTTTTACCTCCAACGTCCCCGCCGTCTTCACGCGGTCCAACGATGAGGAAACAACCACCCCTATCTTCCGCGGAGTGGTTCACACGACCCCCGGCGTTAACGCGCACGTTGTTGTCTTTCGGGACGGCACTACCCCCAAATGCCCAGACCCAGTGCTAGCGATATTCAAACGCCGTCCGGTAAGTCCTGACGTTTTTCAACTGGATGCACTGGCGACACGTAGTTCGCAAGGCGTCCATGAGAGGTTTTAACCGTCATGTTTCCGTATCACGTCTTATGTCTGTCTCTATTCTCAGCGAAATTCTCATCATAGGGCGGAGACTATCTGAAGGCCAGCGAATACAAGATTTAATATCAAATATAGCATGGGGGCCAACAGAGGCCCCCCTGGTGCTGACGAATTATCGTGATATTAGTACAGCTGTCTGCAATGCCATTTCGAAGGCTTTTTGTTCGTATCACTGCTCTATGCATAGCGGTCACTATGACCTCTCAGCTTGACTCACCCGAATGACCAATTGTGGTCCAGCACTCCCTCATCTTCCCCCATTAACGATACGTTGGGCACCATCGGTGTGAGCTACCCGTTACAGTCATAGAATCGTTCTTTGCGTTGTACGCGGCACGGAGGTGACCGGGAAAAGCGCCGCGAAGGCCCCGCACTGAATAAAGCTAGTATTAGCGTCTGTCAAAGTGTTTTGACACCTAATTCGCTTCCAAGTCCCAATATCTAATCTAGCCTGCTTTGGGCCAACATCTCATTGCGTTATGCTAATGAAGAGGGTGCGGGATCACATCCGCTCTTCTCTTCCTATACACAGCGGACATTCGGGTTGGACGTTTGGAGTGATAATTTATCGTTAGGGATAAGTATGTCGGCGCTTAGTAGTATAGCCCGCTGACCAGCGTTCGATTTCGAACCTTACTGGACATTCTCAATAACTACTGATCATGACGTTTTCCTCAGTTCCTAGCCTTGACAACTAGCCACAGTCAGCATGGTAGAGAGCGTTGAGCCGGGGATAGCCAGGCTATTAAGACAAAGACCCTCGGGCCCCTTAATGCGCGTCAAGTCTGACGGTTTGAGTGCGGAGCAGTAAGCGCTTTGGTATAACCGTGACGTAGCAGATCCATGCTTCGCCCGCTTCCACCTGAGAGATACTAGCCTCTTTCGCACTTTGTAGGATTACGGGCAGCGAAATATTTATCCTGTGCGGCGAGCCCGCTTCGGTTTCGAGCTCTATCAGTGCGCGGTTGGCACTCCAACGCACGATAACATATACCCGCCCACAAGGCCATGCAGGTTTAACCTCCTATTCTGATTGTACCTGGCTGACTTTACGGTACCCACCAGCGCAGGATTAATAGCCTAATTATGCTAACCGGTGCTAGTCTAACTGCTGTTACTAGTCCGCCCCAGCTACCCCACGGGTCAGTAACTGCACCAGCAAGCATGGTTCTCCTCCTGAAGTTGTACGTTCGAGAACCCCGTATCGAGTTGGTATATAAATTAAGGGTTGTCTAAAACAGAAGCCTATTCCGCTATCATCGGTGTAATAACTGATCGCGCCGTGGTTAAATGGAGGAGCACCCGCATGGATACATCGCTAGCGTCTTGTAACTCTCTGGGGGCCTAGTATGGAACGGAACAATGACATCATTGCTTACGGGGCCCGCACTTAGCTGTCGCGTATCGCAAATCATATGGCATGTCAGTCCCGACATCACGAAAATGACCCCATCTGAGGTGGTCGGGAGGCGAACAGTCGAATATGATGTATGCACCCGCAACTTAATGTTCAAAGGCGGGCGAAATGCCTTCTCCCGTCCGGACTATCCTGAGTGCTAGCCGCGAGTCTGTAAAGGTTGACGCAACCATATAGCACGCAGAAAAATCACTCTCACACCATGAGAACCATGGCGGCACGCTGTCTACTTTGTCTGACAGGCTACGGAAGGAATGGTACATACGTACAAACGGATGATATGATATCGGTCATTGCCTATTGTGACGCTACCCTACTGCATCACCCCCTTAGAATGCGTTGGACGCTCTATAGCAGATCCTCCATCCAGTGGAAGTCTCGTCGCCGTGGTTTGCCTTAACGACCGTTGGAGAGAGCAGGACAGAAATATCGCCCTTTTGAGCGCATTATTTGGAATCGAGGTAAGTCAGTGCGGCATAATCGCGCCTCGTGAGCGGAACAGTTTTTGATCCCACCCGCTAAATGCCAAGGTGCTGTAACCTGGGCGCGACACCAAAAGACCACGTGCTGTATGAAGCATGTGTTCTAGCGCACTCTCAACCGTTACCCCGAGAGTAAAATGTTAGTTGTAGGCCGATTCTGCAATGGTAATTGGCGGAGTGTCTAGGGAAATGTTTCGGTCATACTTAACCGGCTACCTCTTCCTCCCTCAGATTCGGTCTGAGATGAGATATACTGGGTGAGTTGAGTCGCCCTGTATCGTTGCGGCGCTCGTGGACCAGACAGACAGTTCCCGTTTATCTCTGCTTCTAGATGGAGGGTCGCCTCCGTGTTAACGCCGGCGAAGGTAGTCGCAGCTGAAGTTGTGATGCACAATCAGGTGAGCCTTTTAAGTATGGTCCTACGGACGTGAACAGCTGGGCCCAGTCATTTAGTACGGGGGGTTTACCTATAAGGATACGGTAAGAACGTCATCTATCCGTCCCACTGGAGTCCGAGGGGTTCGTGTCTACACGGATTACTTATCATGCACACACGTCTACGGTCATGCATAAAGTTGTGCAGCGCAGCAATCGGAGCGGAGTTACACCATCTCCCTATTAACAAGGCACTTATTAGTACTTACCCCGTTATAGAGCTCTCATCTTATCGATAGAGCGCAGTCCTAAGTATTGGCTCGAGTGATTCGCTCCTCAGCCCTTGATTGTAACTCCCCCGATTGCAGGTTGTATGGTGAGTAAAATCTCTGCGCCCTTCTGTTCGGATAAAGAACCCCGACCACTAATGCCCGCCTGCTTGTTGGGCGGTAAATGGGTAACGGAACATGGACTATGAGTGCGATGATGGTCAATAGAATTACCTTATTACGCAGTAAAAGGAATGACGCAGACAGGTATTTGTCGACGATTGCTTCGAACCTGGCAAAATGGGGAGGTATCCTGTCATGTTCATCTGTAAAACAACTCCTGCCTCTTCGTAGAGGACACACACTGTGGGCCTTTAGCCTTTAGCAGCCCATTGGGGCTTACCAGCTGTCGTCATGGGGTATCATTAAGATCCATGCGCCCCCGAAACTTACTGCAAAACAATATGGCTTAAAGGTAAAGGGACCATCAGGAGAATGCTTAAGAGCGACATATAGATACGTATTTAATTAATTTATGTTAACGCAACCATCTCGCAGGAGTCGCATAGCATATTGCCGGGTGATAGTTAATGCACTGTGCTTCCGTGTTTATATAAAATAAGCAGTAACCTCTGACAGGTTGAGACTCCAACAAGTGCTCCGGGTATTTACCTTCTACCATGGCGTTCTAATATCACGAAAGAGAAATTGTGTGTACCGATGCCAGGTGACCGCCCGCGTGCGCCAACGACGCAATCTAGAGCATCCACGCTGAATTGGGGAACTCTTGCCGTTCGTCGCATGGTGTACTTGGTACCACTCGATATGCCTGATTAGGTTTGGCCGTAGCACGTAAGGTAGTGACTTTCCATTCAAGCTAGCGAAGCGACACCACCACAGTGCCCGGTCAAAATAACCCACACCTGGCCAGCATAGAGGCTAAAATAGCTACAGTGCGCTAATCGAGTGTTTTTGCATCGGCTCGTGGCTGGTGGACTCGGGACAGCTTAGAACTAACTCTGGTGTACAAACGCGATCGTAGCTCTCGCGACTTACTCACCGGAGTAGGTTAGATGGACAAGACCTAACCCGAAGCCTAAATCGCCCTGAGTGTTAGCCGCCATTCAATTCTATGGTTTATCGGGGGCGTCTATGGCTGCGACAGTATGGAGGCCCGTTATGGGCACCCGAGTATCGTACCATAGTAATCCCATATTCCTCTTCGAGCGACTATTGGATCAACATACCTACAGGGTAGTATGAATGTTCTTGATTACAGAAACCATGGAATCGGCGCATTCTATGTTTCACTTCCGAATAACAGTGAGCAAGGCATGCCCTTGACAAGGATCATCCCGACAGCAAGCCGATCGGGCCCTAGAGCCCGACCCCCAAACAGAACACCGGCCACGTAGTTGCTGGGACTAAACAAAGGTGTGTTTCCATAAAAGGAAATCTTCAAGTGTATTGTTGAGTCGTAACGCTTATATTTATGGCCCAATGGGCGTTGCGAGCACAGTAGCAGGCCTAGATGAATGCCTAGGCCACGATCGGGGGGAGGCTCATTGAACGTACTGCCATACCAAGCCCCCGTATGCTATGGCAGGAGGGGTTCTCTTCGTATAGAGCGAGGGTCTCTACGCCAAGCAGCATTCCCGTGTTGGGTGGCCAATGGGGCTCACTAGAAACTCGGTTTTTTTAGCGAAGGAATGAGCAAACTCGTGAAAGGTGGTACACACCAGTTGCGGCCGATTTGTTGTAGCAACAAGGTTTGAAGAATTGAGTAGATGGGCCAATTTACCTCCTATTTAGCGAGTGAGATGGCGCATGTTTATTCAGACTCCATGTGGGGTAGAGGCTAATCGTTTAGTAGCAATAACCCCGCGGGGCAAGAGACCGTAATAACTTGAATCTGTGGTAGCTATGAATATGTGCTTCGCCCTAAGTGTTATGTAACAAGAGTGATCCAGGGGCTCAGATCACACTTAGTACGATCCGCTACTGAAATGCGGCCGCGGGCTTGCACGCTGGACATAAGTCGGATAATCAATTGCCTACGACAGGTTCAGCCATAAGGCTTGGCTCCTAACACACTCATGATGTCTGGCTTTTACTCGTGCCCGGACATAAACGTATGCTCAAACGCGAGACAGGGGAGGGTCAGCACCGTTTAGATCTATAAGGCCTACCGGTAATATGGATCGACAACAAACAGATGCTATAGGGATACCTACTCCTTTGGACCCACATGTAGATGAAGGCAAACACGCAGAGCAAAGGAGAGTAGTCCACCCGGTATAAGTTTGTGCTTTGAATTCTGGCTACGCAGACTTGCACTCTGTCCCGGCATTCACTATACTTCTCCGGAAGTCCTTTAAGAAATGTCCGCGCTCATGTGGTTCCCGTTGCTCAGGGGCCAACTCAAGTAGATCTTTAAGGCGCAGTCGACCACAGGCTACTAGATACGAGTTATACTTATCCGGACATCTGGCTAAATACTTGGATACGATACTTCCCCAGTCGTGAGAACGAAGCTAATACAGATCGAATTTCGATGGTTCAGGCAGGCAGTTCTCAGGAGGCAAGGTGTTAAATAGTTTCGGAGGCTCTTTCGTACGATCAGGGTCTACTACCCTAGGGCATTTTGACTTTGGATTAAATATGCAAAATGCAAGGCCGATTGTGATCAGTACTGATACTCCAACTGGACCACCTTCAGACCCTTCGAGGGGACCTAGACGACGGGAACCCTTCCAGCGGGTGATACCAGTTAGAGCAAGTCACAAACACGATTCAGCCCCCGGGGTTTATGACGTACCATGCGAGTAATAATGCACGTATACGGAGCTCTTCCACCGAGCGATGGCATTTCGGGGCGAGGTAGTTGTCTTTCATTGGCATCGCACAACCCCCATCCTCTTAATTGGCATCGTCTCCAGCTGGAAAGAATTTGAGTGAGCATGTCGCCCCTATTATTCCGTTGCCAATAAAGTGTCTCAACTTTTGGCGAAGGTTTTAACGCATACAAGGAGAAGCCGCGAGACGTCTGTACCGCTGATCTGGACGCAAAGTGCTCGGACTGCCGCTGAGTTATCCTGGACGCCATGATTAGAGCCGTCGTCACTACCTGCATACATGGGCCGATAGAGTACTGCAACCAACAACTCACTTAAGCTCCACAACGGCTGGACACTTCCGAGAGCGGTCTTACACAAACGTTAGGTCCTGGGCCGCCGACCTTACCGCTAGTTAGTGAGAGCCAGTTAAAATTATGAACGCTCGGAACCTTCCCAACAGTGGCCGCAGCCTTCCTTGACGCCTAGCACATCTGGTTTATACTCGGGTATGCCGTAGATCGGTAACCTAGGGAACGACCCTGTGGGTTTAACACCCGAGTGCGTAATCAAGCCTAGAGGCCATCTCAACTCGAGAGGTCTCCTGACAAAGAGGCGCCCGATGAATCATCCAGAGGCGTCTGGCGGTCCTACGAGAGTGGCTTTGGATGCCTGCCCCTTGGATGGATCTGTCTTTAATCGGCGCCAATACCTAGCACTGCTAGGCTCCAGACTGTGTTTACATGCCGTAACCCTGATACTCGCAGAAACGTTGCTGGAAATTCCTAGCAGCTGAAACCATTCCCCGTAACGTACTAGTACGCTAAGAGAGAGTCTCTCCTGGCCCTGATGAGTGTGTTCTCATCTGGGGCACGATACAAGAATCGGAACGAACGCAATGCCGAAGTCCCTTGTACCTTAATTTGGGCGACGCAGATAGACCCAAAGATCGCGGACTACGGAAACTAGCATAGGACCTGTGTCGAGAAGGTTCGAGCAGGTAGTGACACGCAGCGCGGTGGCCGGCGGGGTGGCACATTGCGGGTCAATACTGGTAGTAGCCACTCTTTGGACATAGCGGCGGACCAGCGCCTAGAATGTCTCATTCTCATTTTGTTCCGTGGCACGTTACGTAATGACGGCCCGCCAGCACCTGTGTATGGACTTGTAGCTCGGGCCTCTGGTCCTGGCACGACAAGGCACCAGCCAGTAATCTCTCCTAAGGCGCTAGCGTGCATAGCGCGTCTGCCTACCGCCAGAGAACGCGTCATCTGCAAGACGTCCCAGCGTAGTGAATTGTAACTGCAAGCGTTCTCTTACGGTCATAGTGCCGATTTTGAGCAGTAATGGAAGCAGCAAAATGCCGCCCAAGCGATTCGCAAACTTCTAACAGAGCTACAGCCGGACACGACGCGGTGGTGCTCGCGGTTGGTGATCTTATGATATTAACGCCCATAGCGGCCATCTTAATCGACACCATGTTCGTTTTGGCAGGCCTTGTGGTAAACACGTGCTAGTGGCACCACCCATGCCCGTGCCCATACATCCAAACCGAGAGAAAGCCTATTTAAGCGAAAACCACAACTTCGAGGTTTCACCCCCTGCCATTGATAAAGCGAGGAGTACCCCCGATGCCGGGAAGCGTCCGCACCCATTTCTTTCGTTCTGGAATCCTCGGGCGACTTCTCGAAGATACTGTGCTCACGACCTGGAGTATCATGAACAATCGGAGGAAAATGAGTAATTGTCGAGTCGTTGTTAGACGGCACTTCCGTCCGGCCCAACTGTTCTCGGATACGTGTCCCGTGGTCAATGCTCTAAACCGGCTGCCGGCGACTCAGTTCACTGAGACAAATTCTGATGCTTTCGAAGCAAGGATGCGCCCAGAGCAGAGCTGCCCAGATGAGGTTAAGAACGTAACTATAATCGATCAGCCATTCGGCTTAAGGGGCCCCGGCGAAACGCGAAACACTTGGCACATGGACGCTTCACGCGCAACAGTAGTTGTCTCTTTCGTGAGCCACCGTAGCAGCTAGAAAGGCCTATCCAGTGATGCTTTATGACTGAGTGTCGAATCTAGGTATAGCATAGACTGGCTGATCGGGCGGGTCGGCCCACCCGTCTCGGTCGAGCGGTTCTGACTTTGGGTGGCTGTGTGAACCCAACTGCAGATGGAGTTGAATGGGTACACCCTATGCGAGGCCTCGTCTTTACACCAAATCGGGGCCCTGTGAAGTGCCACTCTTTTCCAGCCGGCAGCCGCTCAGTCTGATTTTGCTTGTACATGTCGTGTGCGAACGTTCCGGGAGGCTTCCGTGTTCCAAATACCGTGTTCTCATATTCGGTCCATCTACCGACGGAGAGTTGGGATGCCCGGGCCCGGAAATATAATTTAAACTCGTGGCCAAGAATTTAGCATGTTGTAAACATGAGAGACAGGGCCGGGCTAAAACATTACCCCTGAGTAATGTAGAGCCACAACTGAACATAACATTGGGATCTAACGCACGCAATCAGTGTAGCTTCAGCCCACCCTCTAAATTTCCCCCGGACAACTGGATTATCACCTGCGTCACGCGATAATTGCTCGCATCTCACCAACACACTTCGACAAATCTGGAGTCTCCCTGGTCCGTACGTCCAAAACCGTTTAAATGGGCGGGTGTGTCGTGAACCAATCTCCTCTTCCATTTGTCACATACTGGCGATGACATCCTTTTACTTGAATTATTCATCCGGGCACCAGCCGCTTTCCCTACGATCCCCGACACTCGGGGCTTCGGGAGTTGCCCGCCAAAAAACCGACAAACCAAACTATACAATCAATCCCATCTAGATGTAGGGGACTGAGGCTCTAAGCTATGCGCCTACTATACTTTGTAGGTATCAAACTACGCTTGAAGATAGTTGATAAGGAAGCGAATTGATCGAGTACCGTATCTTCAGTCCGACTCCCGTTCGAACGCAGCACGCTAACATGGTCCACTGGCATTCTTACTAAATACCTAGTTCACTTCTACATGAGGAGTGTCTGGGCCGGACTCACCTTTGATTAGATAACTGAAG
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.toucan
import java.io.File
import java.nio.file.Paths
import com.google.common.io.Files
import nl.lumc.sasc.biopet.extensions.VariantEffectPredictor
import nl.lumc.sasc.biopet.extensions.tools.VcfWithVcf
import nl.lumc.sasc.biopet.utils.config.Config
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by pjvan_thof on 4/11/16.
*/
class ToucanTest extends TestNGSuite with Matchers {
def initPipeline(map: Map[String, Any]): Toucan = {
new Toucan {
override def configNamespace = "toucan"
override def globalConfig = new Config(map)
qSettings = new QSettings
qSettings.runName = "test"
}
}
@Test
def testDefault(): Unit = {
val pipeline = initPipeline(ToucanTest.config)
pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz"))
pipeline.script()
pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 1
pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 0
}
@Test
def testBinning(): Unit = {
val pipeline = initPipeline(ToucanTest.config ++ Map("bin_size" -> 4000, "min_scatter_genome_size" -> 1000))
pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz"))
pipeline.script()
pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 4
pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 0
}
@Test
def testGonl(): Unit = {
val pipeline = initPipeline(ToucanTest.config ++ Map("gonl_vcf" -> ToucanTest.gonlVcfFile))
pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz"))
pipeline.script()
pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 1
pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 1
}
@Test
def testExac(): Unit = {
val pipeline = initPipeline(ToucanTest.config ++ Map("exac_vcf" -> ToucanTest.exacVcfFile))
pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz"))
pipeline.script()
pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 1
pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 1
}
@Test
def testVarda(): Unit = {
val pipeline = initPipeline(ToucanTest.config ++ Map("use_varda" -> true))
val gvcfFile = File.createTempFile("bla.", ".g.vcf")
pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz"))
pipeline.inputGvcf = Some(gvcfFile)
pipeline.script()
pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 1
pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 0
}
}
object ToucanTest {
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
}
val outputDir = Files.createTempDir()
outputDir.deleteOnExit()
val gonlVcfFile: File = File.createTempFile("gonl.", ".vcf.gz")
gonlVcfFile.deleteOnExit()
val exacVcfFile: File = File.createTempFile("exac.", ".vcf.gz")
exacVcfFile.deleteOnExit()
val config = Map(
"reference_fasta" -> resourcePath("/fake_chrQ.fa"),
"output_dir" -> outputDir,
"gatk_jar" -> "test",
"varianteffectpredictor" -> Map(
"vep_script" -> "test",
"cache" -> true,
"dir" -> "test"
),
"varda_root" -> "test",
"varda_token" -> "test",
"bcftools" -> Map("exe" -> "test"),
"bedtools" -> Map("exe" -> "test"),
"manwe" -> Map("exe" -> "test"),
"bgzip" -> Map("exe" -> "test")
)
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment