Commit e6199e01 authored by Sander Bollen's avatar Sander Bollen
Browse files

Merge branch 'feature-sample_regex' into 'develop'

Added regex option



See merge request !458
parents 4db54050 d1e7b1f9
......@@ -6,6 +6,7 @@ import nl.lumc.sasc.biopet.utils.ToolCommand
import scala.collection.mutable.ListBuffer
import scala.io.Source
import scala.util.matching.Regex
/**
* This tool will find all pairs above a cutoff in a data table
......@@ -17,7 +18,9 @@ object FindOverlapMatch extends ToolCommand {
case class Args(inputMetrics: File = null,
outputFile: Option[File] = None,
cutoff: Double = 0.0,
filterSameNames: Boolean = true) extends AbstractArgs
filterSameNames: Boolean = true,
rowSampleRegex: Option[Regex] = None,
columnSampleRegex: Option[Regex] = None) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('i', "input") required () unbounded () valueName "<file>" action { (x, c) =>
......@@ -32,7 +35,12 @@ object FindOverlapMatch extends ToolCommand {
opt[Unit]("use_same_names") unbounded () valueName "<value>" action { (x, c) =>
c.copy(filterSameNames = false)
} text "Do not compare samples with the same name"
opt[String]("rowSampleRegex") unbounded () valueName "<regex>" action { (x, c) =>
c.copy(rowSampleRegex = Some(x.r))
} text "Samples in the row should match this regex"
opt[String]("columnSampleRegex") unbounded () valueName "<regex>" action { (x, c) =>
c.copy(columnSampleRegex = Some(x.r))
} text "Samples in the column should match this regex"
}
/**
......@@ -58,19 +66,19 @@ object FindOverlapMatch extends ToolCommand {
case _ => sys.process.stdout
}
for (i1 <- samplesColumnHeader) {
for (columnSample <- samplesColumnHeader if cmdArgs.columnSampleRegex.map(_.findFirstIn(columnSample._1).isDefined).getOrElse(true)) {
val buffer = ListBuffer[(String, Double)]()
for (i2 <- samplesRowHeader) {
val value = data(i1._2)(i2._2).toDouble
if (value >= cmdArgs.cutoff && (!cmdArgs.filterSameNames || i1._2 != i2._2)) {
buffer.+=((i2._1, value))
for (rowSample <- samplesRowHeader if cmdArgs.rowSampleRegex.map(_.findFirstIn(rowSample._1).isDefined).getOrElse(true)) {
val value = data(columnSample._2)(rowSample._2).toDouble
if (value >= cmdArgs.cutoff && (!cmdArgs.filterSameNames || columnSample._2 != rowSample._2)) {
buffer.+=((rowSample._1, value))
}
}
if (buffer.nonEmpty) overlap += 1
else noOverlap += 1
if (buffer.size > 1) multiOverlap += 1
writer.println(s"${i1._1}\t${buffer.mkString("\t")}")
writer.println(s"${columnSample._1}\t${buffer.mkString("\t")}")
}
logger.info(s"$overlap found")
logger.info(s"no $noOverlap found")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment