Commit 08448483 authored by pjvan_thof's avatar pjvan_thof

WIP

parent 88d51c42
...@@ -163,6 +163,12 @@ ...@@ -163,6 +163,12 @@
<exclude>META-INF/*.RSA</exclude> <exclude>META-INF/*.RSA</exclude>
</excludes> </excludes>
</filter> </filter>
<filter>
<artifact>javax.servlet:servlet-api:2.5</artifact>
<excludes>
<exclude>*.*</exclude>
</excludes>
</filter>
</filters> </filters>
</configuration> </configuration>
<executions> <executions>
......
...@@ -7,7 +7,7 @@ import htsjdk.variant.vcf.VCFFileReader ...@@ -7,7 +7,7 @@ import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.tools.vcfstats.VcfStats._ import nl.lumc.sasc.biopet.tools.vcfstats.VcfStats._
import nl.lumc.sasc.biopet.utils.{ConfigUtils, FastaUtils, ToolCommand, VcfUtils} import nl.lumc.sasc.biopet.utils.{ConfigUtils, FastaUtils, ToolCommand, VcfUtils}
import nl.lumc.sasc.biopet.utils.intervals.{BedRecord, BedRecordList} import nl.lumc.sasc.biopet.utils.intervals.{BedRecord, BedRecordList}
import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.{HashPartitioner, SparkConf, SparkContext}
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
import scala.concurrent.{Await, Future} import scala.concurrent.{Await, Future}
...@@ -25,7 +25,7 @@ object VcfStatsSpark extends ToolCommand { ...@@ -25,7 +25,7 @@ object VcfStatsSpark extends ToolCommand {
class OptParser extends AbstractOptParser { class OptParser extends AbstractOptParser {
opt[File]('I', "inputFile") required () unbounded () maxOccurs 1 valueName "<file>" action { opt[File]('I', "inputFile") required () unbounded () maxOccurs 1 valueName "<file>" action {
(x, c) => (x, c) =>
c.copy(inputFile = x) c.copy(inputFile = x.getAbsoluteFile)
} validate { x => } validate { x =>
if (x.exists) success else failure("Input VCF required") if (x.exists) success else failure("Input VCF required")
} text "Input VCF file (required)" } text "Input VCF file (required)"
...@@ -122,10 +122,17 @@ object VcfStatsSpark extends ToolCommand { ...@@ -122,10 +122,17 @@ object VcfStatsSpark extends ToolCommand {
logger.info("Init spark context") logger.info("Init spark context")
val jars = ClassLoader.getSystemClassLoader
.asInstanceOf[URLClassLoader]
.getURLs
.map(_.getFile) ++ List(
"/home/pjvan_thof/src/biopet/biopet-utils/target/BiopetUtils-0.10.0-SNAPSHOT.jar",
"/home/pjvan_thof/src/biopet/biopet-tools/target/BiopetTools-0.10.0-SNAPSHOT.jar"
)
val conf = new SparkConf() val conf = new SparkConf()
.setAppName(this.getClass.getSimpleName) .setAppName(this.getClass.getSimpleName)
.setMaster(cmdArgs.sparkMaster.getOrElse(s"local[${cmdArgs.localThreads}]")) .setMaster(cmdArgs.sparkMaster.getOrElse(s"local[${cmdArgs.localThreads}]"))
.setJars(ClassLoader.getSystemClassLoader.asInstanceOf[URLClassLoader].getURLs.map(_.getFile)) .setJars(jars)
val sc = new SparkContext(conf) val sc = new SparkContext(conf)
logger.info("Spark context is up") logger.info("Spark context is up")
...@@ -136,41 +143,29 @@ object VcfStatsSpark extends ToolCommand { ...@@ -136,41 +143,29 @@ object VcfStatsSpark extends ToolCommand {
}).combineOverlap }).combineOverlap
.scatter(cmdArgs.binSize) .scatter(cmdArgs.binSize)
.flatten .flatten
val contigs = regions.map(_.chr).distinct
val regionStats = sc.parallelize(regions, regions.size).groupBy(_.chr).map { val regionStats = sc.parallelize(regions, regions.size).map { record =>
case (contig, records) => record.chr -> (readBin(record, samples, cmdArgs, adInfoTags, adGenotypeTags), record)
contig -> records.map(readBin(_, samples, cmdArgs, adInfoTags, adGenotypeTags))
} }
val chrStats = val f1 = (s:Stats, b:BedRecord) => s
regionStats.map { case (contig, stats) => contig -> stats.reduce(_ += _) }.cache() val f3 = (s1:Stats, s: Stats) => s1 += s
val chrStats = regionStats.combineByKey(
val contigOverlap = chrStats.map { createCombiner = (x: (Stats, BedRecord)) => x._1,
case (contig, stats) => mergeValue = (x: Stats,b: (Stats, BedRecord)) => x += b._1,
writeOverlap(stats, mergeCombiners = (x: Stats,y: Stats) => x += y,
_.genotypeOverlap, partitioner = new HashPartitioner(contigs.size),
cmdArgs.outputDir + s"/sample_compare/contigs/$contig/genotype_overlap", mapSideCombine = true)
samples,
cmdArgs.contigSampleOverlapPlots) //val chrStats = regionStats.aggregateByKey(Stats.emptyStats(samples)) (_ += _._1, _ += _)
writeOverlap(stats,
_.alleleOverlap,
cmdArgs.outputDir + s"/sample_compare/contigs/$contig/allele_overlap",
samples,
cmdArgs.contigSampleOverlapPlots)
}
val totalStats = chrStats.values.reduce(_ += _) // Blocking val totalStats = chrStats.aggregate(Stats.emptyStats(samples)) (_ += _._2, _ += _)
//Await.ready(contigOverlap, Duration.Inf) //Await.ready(contigOverlap, Duration.Inf)
val allWriter = new PrintWriter(new File(cmdArgs.outputDir, "stats.json")) val allWriter = new PrintWriter(new File(cmdArgs.outputDir, "stats.json"))
val json = ConfigUtils.mapToJson( val json = ConfigUtils.mapToJson(
totalStats.getAllStats( totalStats.getAllStats(contigs, samples, adGenotypeTags, adInfoTags, sampleDistributions))
FastaUtils.getCachedDict(cmdArgs.referenceFile).getSequences.map(_.getSequenceName).toList,
samples,
adGenotypeTags,
adInfoTags,
sampleDistributions))
allWriter.println(json.nospaces) allWriter.println(json.nospaces)
allWriter.close() allWriter.close()
...@@ -185,6 +180,8 @@ object VcfStatsSpark extends ToolCommand { ...@@ -185,6 +180,8 @@ object VcfStatsSpark extends ToolCommand {
cmdArgs.outputDir + "/sample_compare/allele_overlap", cmdArgs.outputDir + "/sample_compare/allele_overlap",
samples) samples)
Thread.sleep(1000000)
sc.stop sc.stop
logger.info("Done") logger.info("Done")
} }
......
...@@ -34,6 +34,17 @@ ...@@ -34,6 +34,17 @@
<groupId>com.typesafe.play</groupId> <groupId>com.typesafe.play</groupId>
<artifactId>play-ws_2.11</artifactId> <artifactId>play-ws_2.11</artifactId>
<version>2.5.15</version> <version>2.5.15</version>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
<version>3.1.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.slf4j</groupId> <groupId>org.slf4j</groupId>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment