Skip to content
Snippets Groups Projects
Commit 9a00253e authored by bow's avatar bow
Browse files

Add refFlat support

parent 94a2b3dd
No related branches found
No related tags found
No related merge requests found
......@@ -6,6 +6,7 @@ package nl.lumc.sasc.biopet.tools
import java.io.File
import scala.collection.JavaConverters._
import scala.io.Source
import scala.math.{ max, min }
import com.google.common.hash.{ Funnel, BloomFilter, PrimitiveSink }
......@@ -83,20 +84,42 @@ object WipeReads extends ToolCommand {
.asScala
.map(x => new Interval(x.getChr, x.getStart, x.getEnd))
/** Function to create iterator from refFlat file */
def makeIntervalFromRefFlat(inFile: File): Iterator[Interval] = ???
// convert coordinate to 1-based fully closed
// parse chrom, start blocks, end blocks, strands
/**
* Parses a refFlat file to yield Interval objects
*
* Format description:
* http://genome.csdb.cn/cgi-bin/hgTables?hgsid=6&hgta_doSchemaDb=hg18&hgta_doSchemaTable=refFlat
*
* @param inFile input refFlat file
*/
def makeIntervalFromRefFlat(inFile: File): Iterator[Interval] =
Source.fromFile(inFile)
// read each line
.getLines()
// skip all empty lines
.filterNot(x => x.trim.isEmpty)
// split per column
.map(line => line.trim.split("\t"))
// take chromosome and exonEnds and exonStars
.map(x => (x(2), x.reverse.take(2)))
// split starts and ends based on comma
.map(x => (x._1, x._2.map(y => y.split(","))))
// zip exonStarts and exonEnds, note the index was reversed because we did .reverse above
.map(x => (x._1, x._2(1).zip(x._2(0))))
// make Intervals
.map(x => x._2.map(y => new Interval(x._1, y._1.toInt, y._2.toInt)))
// flatten sublist
.flatten
/** Function to create iterator from GTF file */
def makeIntervalFromGtf(inFile: File): Iterator[Interval] = ???
// convert coordinate to 1-based fully closed
// parse chrom, start blocks, end blocks, strands
// detect interval file format from extension
val iterFunc: (File => Iterator[Interval]) =
if (getExtension(inFile.toString.toLowerCase) == "bed")
makeIntervalFromBed
else if (getExtension(inFile.toString.toLowerCase) == "refflat")
makeIntervalFromRefFlat
else
throw new IllegalArgumentException("Unexpected interval file type: " + inFile.getPath)
......@@ -319,7 +342,7 @@ object WipeReads extends ToolCommand {
x => if (x.exists) success else failure("Input BAM file not found")
} text "Input BAM file"
opt[File]('r', "interval_file") required () valueName "<bed>" action { (x, c) =>
opt[File]('r', "interval_file") required () valueName "<bed/refflat>" action { (x, c) =>
c.copy(targetRegions = x)
} validate {
x => if (x.exists) success else failure("Target regions file not found")
......
gene1 trans1 chrS + 100 500 100 500 1 100, 500,
gene1 trans1 chrR + 100 500 200 400 1 100, 500,
gene1 trans1 chrQ + 100 1000 200 900 3 100,300,800 200,400,1000
......@@ -126,18 +126,33 @@ class WipeReadsUnitTest extends TestNGSuite with MockitoSugar with Matchers {
)
val pBamFile3 = new File(resourcePath("/paired03.bam"))
val BedFile1 = new File(resourcePath("/rrna01.bed"))
val minArgList = List("-I", sBamFile1.toString, "-l", BedFile1.toString, "-o", "mock.bam")
val RefFlatFile1 = new File(resourcePath("/rrna01.refFlat"))
@Test def testMakeFeatureFromBed() = {
@Test def testMakeIntervalFromBed() = {
val intervals: List[Interval] = makeIntervalFromFile(BedFile1)
intervals.length should be(3)
intervals.length shouldBe 3
intervals.head.getSequence should ===("chrQ")
intervals.head.getStart should be(991)
intervals.head.getEnd should be(1000)
intervals.head.getStart shouldBe 991
intervals.head.getEnd shouldBe 1000
intervals.last.getSequence should ===("chrQ")
intervals.last.getStart shouldBe 291
intervals.last.getEnd shouldBe 320
}
@Test def testMakeIntervalFromRefFlat() = {
val intervals: List[Interval] = makeIntervalFromFile(RefFlatFile1)
intervals.length shouldBe 5
intervals.last.getSequence should ===("chrQ")
intervals.last.getStart should be(291)
intervals.last.getEnd should be(320)
intervals.last.getStart shouldBe 100
intervals.last.getEnd shouldBe 200
intervals(2).getSequence should ===("chrQ")
intervals(2).getStart shouldBe 800
intervals(2).getEnd shouldBe 1000
intervals.head.getSequence should ===("chrS")
intervals.head.getStart shouldBe 100
intervals.head.getEnd shouldBe 500
}
@Test def testSingleBamDefault() = {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment