Commit 8a710c02 authored by bow's avatar bow

Merge branch 'feature-find_overlap' into 'develop'

Feature find overlap

This is a sample tool to find overlaping samples based on a matrix produces by vcfstats

See merge request !454
parents 6efe0956 2d9becc9
......@@ -32,6 +32,7 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.FastqSplitter,
nl.lumc.sasc.biopet.tools.FastqSync,
nl.lumc.sasc.biopet.tools.FindRepeatsPacBio,
nl.lumc.sasc.biopet.tools.FindOverlapMatch,
nl.lumc.sasc.biopet.tools.GvcfToBed,
nl.lumc.sasc.biopet.tools.MergeAlleles,
nl.lumc.sasc.biopet.tools.MergeTables,
......
package nl.lumc.sasc.biopet.tools
import java.io.{ File, PrintStream }
import nl.lumc.sasc.biopet.utils.ToolCommand
import scala.collection.mutable.ListBuffer
import scala.io.Source
/**
* This tool will find all pairs above a cutoff in a data table
*
* Created by pjvan_thof on 21-9-16.
*/
object FindOverlapMatch extends ToolCommand {
case class Args(inputMetrics: File = null,
outputFile: Option[File] = None,
cutoff: Double = 0.0,
filterSameNames: Boolean = true) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('i', "input") required () unbounded () valueName "<file>" action { (x, c) =>
c.copy(inputMetrics = x)
} text "Input should be a table where the first row and column have the ID's, those can be different"
opt[File]('o', "output") unbounded () valueName "<file>" action { (x, c) =>
c.copy(outputFile = Some(x))
} text "default to stdout"
opt[Double]('c', "cutoff") required () unbounded () valueName "<value>" action { (x, c) =>
c.copy(cutoff = x)
} text "minimum value to report it as pair"
opt[Unit]("use_same_names") unbounded () valueName "<value>" action { (x, c) =>
c.copy(filterSameNames = false)
} text "Do not compare samples with the same name"
}
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
val reader = Source.fromFile(cmdArgs.inputMetrics)
val data = reader.getLines().map(_.split("\t")).toArray
val samplesColumnHeader = data.head.zipWithIndex.tail
val samplesRowHeader = data.map(_.head).zipWithIndex.tail
var overlap = 0
var multiOverlap = 0
var noOverlap = 0
val writer = cmdArgs.outputFile match {
case Some(file) => new PrintStream(file)
case _ => sys.process.stdout
}
for (i1 <- samplesColumnHeader) {
val buffer = ListBuffer[(String, Double)]()
for (i2 <- samplesRowHeader) {
val value = data(i1._2)(i2._2).toDouble
if (value >= cmdArgs.cutoff && (!cmdArgs.filterSameNames || i1._2 != i2._2)) {
buffer.+=((i2._1, value))
}
}
if (buffer.nonEmpty) overlap += 1
else noOverlap += 1
if (buffer.size > 1) multiOverlap += 1
writer.println(s"${i1._1}\t${buffer.mkString("\t")}")
}
logger.info(s"$overlap found")
logger.info(s"no $noOverlap found")
logger.info(s"multi $multiOverlap found")
writer.close()
}
}
sample1 (sample3,0.9)
sample2
sample3 (sample1,0.9)
sample1 (sample1,1.0) (sample3,0.9)
sample2 (sample2,1.0)
sample3 (sample1,0.9) (sample3,1.0)
sample1 sample2 sample3
sample1 1.0 0.5 0.9
sample2 0.5 1.0 0.5
sample3 0.9 0.5 1.0
package nl.lumc.sasc.biopet.tools
import java.io.File
import java.nio.file.Paths
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import scala.io.Source
/**
* Created by pjvan_thof on 27-9-16.
*/
class FindOverlapMatchTest extends TestNGSuite with Matchers {
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
}
@Test
def testOverlap: Unit = {
val input = new File(resourcePath("/overlapmetrics.txt"))
val output = File.createTempFile("overlap.", ".txt")
val shouldBeOutput = new File(resourcePath("/overlapmetrics.default.output"))
output.deleteOnExit()
FindOverlapMatch.main(Array("-i", input.getAbsolutePath, "-c", "0.9", "-o", output.getAbsolutePath))
Source.fromFile(output).getLines().toList shouldBe Source.fromFile(shouldBeOutput).getLines().toList
}
@Test
def testOverlapSameName: Unit = {
val input = new File(resourcePath("/overlapmetrics.txt"))
val output = File.createTempFile("overlap.", ".txt")
val shouldBeOutput = new File(resourcePath("/overlapmetrics.same_names.output"))
output.deleteOnExit()
FindOverlapMatch.main(Array("-i", input.getAbsolutePath, "-c", "0.9", "-o", output.getAbsolutePath, "--use_same_names"))
Source.fromFile(output).getLines().toList shouldBe Source.fromFile(shouldBeOutput).getLines().toList
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment