Commit e6199e01 authored by Sander Bollen's avatar Sander Bollen
Browse files

Merge branch 'feature-sample_regex' into 'develop'

Added regex option



See merge request !458
parents 4db54050 d1e7b1f9
...@@ -6,6 +6,7 @@ import nl.lumc.sasc.biopet.utils.ToolCommand ...@@ -6,6 +6,7 @@ import nl.lumc.sasc.biopet.utils.ToolCommand
import scala.collection.mutable.ListBuffer import scala.collection.mutable.ListBuffer
import scala.io.Source import scala.io.Source
import scala.util.matching.Regex
/** /**
* This tool will find all pairs above a cutoff in a data table * This tool will find all pairs above a cutoff in a data table
...@@ -17,7 +18,9 @@ object FindOverlapMatch extends ToolCommand { ...@@ -17,7 +18,9 @@ object FindOverlapMatch extends ToolCommand {
case class Args(inputMetrics: File = null, case class Args(inputMetrics: File = null,
outputFile: Option[File] = None, outputFile: Option[File] = None,
cutoff: Double = 0.0, cutoff: Double = 0.0,
filterSameNames: Boolean = true) extends AbstractArgs filterSameNames: Boolean = true,
rowSampleRegex: Option[Regex] = None,
columnSampleRegex: Option[Regex] = None) extends AbstractArgs
class OptParser extends AbstractOptParser { class OptParser extends AbstractOptParser {
opt[File]('i', "input") required () unbounded () valueName "<file>" action { (x, c) => opt[File]('i', "input") required () unbounded () valueName "<file>" action { (x, c) =>
...@@ -32,7 +35,12 @@ object FindOverlapMatch extends ToolCommand { ...@@ -32,7 +35,12 @@ object FindOverlapMatch extends ToolCommand {
opt[Unit]("use_same_names") unbounded () valueName "<value>" action { (x, c) => opt[Unit]("use_same_names") unbounded () valueName "<value>" action { (x, c) =>
c.copy(filterSameNames = false) c.copy(filterSameNames = false)
} text "Do not compare samples with the same name" } text "Do not compare samples with the same name"
opt[String]("rowSampleRegex") unbounded () valueName "<regex>" action { (x, c) =>
c.copy(rowSampleRegex = Some(x.r))
} text "Samples in the row should match this regex"
opt[String]("columnSampleRegex") unbounded () valueName "<regex>" action { (x, c) =>
c.copy(columnSampleRegex = Some(x.r))
} text "Samples in the column should match this regex"
} }
/** /**
...@@ -58,19 +66,19 @@ object FindOverlapMatch extends ToolCommand { ...@@ -58,19 +66,19 @@ object FindOverlapMatch extends ToolCommand {
case _ => sys.process.stdout case _ => sys.process.stdout
} }
for (i1 <- samplesColumnHeader) { for (columnSample <- samplesColumnHeader if cmdArgs.columnSampleRegex.map(_.findFirstIn(columnSample._1).isDefined).getOrElse(true)) {
val buffer = ListBuffer[(String, Double)]() val buffer = ListBuffer[(String, Double)]()
for (i2 <- samplesRowHeader) { for (rowSample <- samplesRowHeader if cmdArgs.rowSampleRegex.map(_.findFirstIn(rowSample._1).isDefined).getOrElse(true)) {
val value = data(i1._2)(i2._2).toDouble val value = data(columnSample._2)(rowSample._2).toDouble
if (value >= cmdArgs.cutoff && (!cmdArgs.filterSameNames || i1._2 != i2._2)) { if (value >= cmdArgs.cutoff && (!cmdArgs.filterSameNames || columnSample._2 != rowSample._2)) {
buffer.+=((i2._1, value)) buffer.+=((rowSample._1, value))
} }
} }
if (buffer.nonEmpty) overlap += 1 if (buffer.nonEmpty) overlap += 1
else noOverlap += 1 else noOverlap += 1
if (buffer.size > 1) multiOverlap += 1 if (buffer.size > 1) multiOverlap += 1
writer.println(s"${i1._1}\t${buffer.mkString("\t")}") writer.println(s"${columnSample._1}\t${buffer.mkString("\t")}")
} }
logger.info(s"$overlap found") logger.info(s"$overlap found")
logger.info(s"no $noOverlap found") logger.info(s"no $noOverlap found")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment