Commit 4548f60c authored by bow's avatar bow
Browse files

Revert back to JDK 1.7 due to regression in GATK 3.3

parent b7521058
<component name="libraryTable">
<library name="Maven: com.baqend:bloom-filter:1.02">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/baqend/bloom-filter/1.02/bloom-filter-1.02.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/baqend/bloom-filter/1.02/bloom-filter-1.02-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/baqend/bloom-filter/1.02/bloom-filter-1.02-sources.jar!/" />
</SOURCES>
</library>
</component>
\ No newline at end of file
<component name="libraryTable">
<library name="Maven: com.google.code.gson:gson:2.2.4">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/google/code/gson/gson/2.2.4/gson-2.2.4.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/google/code/gson/gson/2.2.4/gson-2.2.4-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/google/code/gson/gson/2.2.4/gson-2.2.4-sources.jar!/" />
</SOURCES>
</library>
</component>
\ No newline at end of file
<component name="libraryTable">
<library name="Maven: com.google.guava:guava:17.0">
<library name="Maven: com.google.guava:guava:18.0">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/google/guava/guava/17.0/guava-17.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/google/guava/guava/18.0/guava-18.0.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/google/guava/guava/17.0/guava-17.0-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/google/guava/guava/18.0/guava-18.0-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/google/guava/guava/17.0/guava-17.0-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/google/guava/guava/18.0/guava-18.0-sources.jar!/" />
</SOURCES>
</library>
</component>
\ No newline at end of file
<component name="libraryTable">
<library name="Maven: org.apache.commons:commons-pool2:2.2">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-pool2/2.2/commons-pool2-2.2.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-pool2/2.2/commons-pool2-2.2-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-pool2/2.2/commons-pool2-2.2-sources.jar!/" />
</SOURCES>
</library>
</component>
\ No newline at end of file
<component name="libraryTable">
<library name="Maven: redis.clients:jedis:2.5.1">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/redis/clients/jedis/2.5.1/jedis-2.5.1.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/redis/clients/jedis/2.5.1/jedis-2.5.1-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/redis/clients/jedis/2.5.1/jedis-2.5.1-sources.jar!/" />
</SOURCES>
</library>
</component>
\ No newline at end of file
......@@ -10,7 +10,7 @@
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_6" assert-keyword="true" jdk-15="true" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_6" assert-keyword="true" jdk-15="true" project-jdk-name="1.7" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>
......
......@@ -21,6 +21,7 @@
<sourceFolder url="file://$MODULE_DIR$/src/main/scala" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test/scala" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/main/scripts" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
......@@ -45,11 +46,7 @@
<orderEntry type="library" name="Maven: com.github.julien-truffaut:monocle-core_2.11:0.5.0" level="project" />
<orderEntry type="library" name="Maven: org.biojava:biojava3-core:3.1.0" level="project" />
<orderEntry type="library" name="Maven: org.biojava:biojava3-sequencing:3.1.0" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:17.0" level="project" />
<orderEntry type="library" name="Maven: com.baqend:bloom-filter:1.02" level="project" />
<orderEntry type="library" name="Maven: redis.clients:jedis:2.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-pool2:2.2" level="project" />
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.2.4" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:18.0" level="project" />
<orderEntry type="library" name="Maven: com.github.scopt:scopt_2.10:3.2.0" level="project" />
</component>
</module>
......
......@@ -24,10 +24,6 @@
<name>BioJava repository</name>
<url>http://www.biojava.org/download/maven/</url>
</repository>
<repository>
<id>orestes-bloom-filter</id>
<url>https://raw.githubusercontent.com/Baqend/Orestes-Bloomfilter/master/maven-repo</url>
</repository>
</repositories>
<dependencies>
<dependency>
......@@ -67,9 +63,9 @@
<version>3.1.0</version>
</dependency>
<dependency>
<groupId>com.baqend</groupId>
<artifactId>bloom-filter</artifactId>
<version>1.02</version>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>18.0</version>
</dependency>
<dependency>
<groupId>com.github.scopt</groupId>
......
......@@ -8,6 +8,7 @@ import java.io.File
import scala.collection.JavaConverters._
import com.google.common.hash.{Funnel, BloomFilter, PrimitiveSink}
import htsjdk.samtools.AlignmentBlock
import htsjdk.samtools.SAMFileReader
import htsjdk.samtools.SAMFileReader.QueryInterval
......@@ -18,8 +19,6 @@ import htsjdk.tribble.Feature
import htsjdk.tribble.BasicFeature
import htsjdk.tribble.bed.BEDCodec
import htsjdk.tribble.index.interval.{ Interval, IntervalTree }
import orestes.bloomfilter.HashProvider.HashMethod
import orestes.bloomfilter.{ BloomFilter, FilterBuilder }
import org.apache.commons.io.FilenameUtils.getExtension
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -218,8 +217,32 @@ object WipeReads extends ToolCommand {
return true
}
false
} else
true
} else true
/** function to create a fake SAMRecord pair ~ hack to limit querying BAM file for real pair */
def makeMockPair(rec: SAMRecord): SAMRecord = {
require(rec.getReadPairedFlag)
val fakePair = rec.clone.asInstanceOf[SAMRecord]
fakePair.setAlignmentStart(rec.getMateAlignmentStart)
fakePair
}
/** function to create set element from SAMRecord */
def elemFromSam(rec: SAMRecord): String = {
if (filterOutMulti)
rec.getReadName
else
rec.getReadName + "_" + rec.getAlignmentStart.toString
}
/** object for use by BloomFilter */
object SAMFunnel extends Funnel[SAMRecord] {
override def funnel(rec: SAMRecord, into: PrimitiveSink): Unit = {
val elem = elemFromSam(rec)
logger.debug("Adding " + elem + " to set ...")
into.putUnencodedChars(elem)
}
}
/** filter function for read IDs */
val rgFilter =
......@@ -228,15 +251,6 @@ object WipeReads extends ToolCommand {
else
(r: SAMRecord) => readGroupIds.contains(r.getReadGroup.getReadGroupId)
/** function to get set element */
val SamRecordElement =
if (filterOutMulti)
(r: SAMRecord) => r.getReadName
else
(r: SAMRecord) => r.getReadName + "_" + r.getAlignmentStart.toString
val SamRecordMateElement =
(r: SAMRecord) => r.getReadName + "_" + r.getMateAlignmentStart.toString
val readyBam = prepIndexedInputBam()
......@@ -257,7 +271,7 @@ object WipeReads extends ToolCommand {
.groupBy(x => x.getChr)
.map({ case (key, value) => (key, makeIntervalTree(value)) })
lazy val filteredOutSet: BloomFilter[String] = readyBam
lazy val filteredOutSet: BloomFilter[SAMRecord] = readyBam
// query BAM file with intervals
.queryOverlapping(queryIntervals)
// for compatibility
......@@ -269,28 +283,21 @@ object WipeReads extends ToolCommand {
// filter on specific read group IDs
.filter(x => rgFilter(x))
// fold starting from empty set
.foldLeft(new FilterBuilder(bloomSize.toInt, bloomFp)
.hashFunction(HashMethod.Murmur3KirschMitzenmacher)
.buildBloomFilter(): BloomFilter[String]
.foldLeft(BloomFilter.create(SAMFunnel, bloomSize.toInt, bloomFp)
)((acc, rec) => {
logger.debug("Adding read " + rec.getReadName + " to set ...")
if ((!filterOutMulti) && rec.getReadPairedFlag) {
acc.add(SamRecordElement(rec))
acc.add(SamRecordMateElement(rec))
} else
acc.add(SamRecordElement(rec))
acc.put(rec)
if (rec.getReadPairedFlag) acc.put(makeMockPair(rec))
acc
})
if (filterOutMulti)
(rec: SAMRecord) => filteredOutSet.contains(rec.getReadName)
(rec: SAMRecord) => filteredOutSet.mightContain(rec)
else
(rec: SAMRecord) => {
if (rec.getReadPairedFlag)
filteredOutSet.contains(SamRecordElement(rec)) &&
filteredOutSet.contains(SamRecordMateElement(rec))
filteredOutSet.mightContain(rec) && filteredOutSet.mightContain(makeMockPair(rec))
else
filteredOutSet.contains(SamRecordElement(rec))
filteredOutSet.mightContain(rec)
}
}
......@@ -345,13 +352,6 @@ object WipeReads extends ToolCommand {
}
}
/** Function to check whether the bloom filter can fulfill size and false positive guarantees
As we are currently limited to maximum integer size if the optimal array size equals or
exceeds it, we assume that it's a result of a truncation and return false.
*/
def bloomParamsOk(bloomSize: Long, bloomFp: Double): Boolean =
FilterBuilder.optimalM(bloomSize, bloomFp) <= Int.MaxValue
case class Args(inputBam: File = null,
targetRegions: File = null,
outputBam: File = null,
......@@ -425,12 +425,6 @@ object WipeReads extends ToolCommand {
|the given ones, they will also be removed.
""".stripMargin)
checkConfig { c =>
if (!bloomParamsOk(c.bloomSize, c.bloomFp))
failure("Bloom parameters combination exceed Int limitation")
else
success
}
}
def main(args: Array[String]): Unit = {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment