Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
e6199e01
Commit
e6199e01
authored
Oct 24, 2016
by
Sander Bollen
Browse files
Merge branch 'feature-sample_regex' into 'develop'
Added regex option See merge request !458
parents
4db54050
d1e7b1f9
Changes
1
Hide whitespace changes
Inline
Side-by-side
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FindOverlapMatch.scala
View file @
e6199e01
...
...
@@ -6,6 +6,7 @@ import nl.lumc.sasc.biopet.utils.ToolCommand
import
scala.collection.mutable.ListBuffer
import
scala.io.Source
import
scala.util.matching.Regex
/**
* This tool will find all pairs above a cutoff in a data table
...
...
@@ -17,7 +18,9 @@ object FindOverlapMatch extends ToolCommand {
case
class
Args
(
inputMetrics
:
File
=
null
,
outputFile
:
Option
[
File
]
=
None
,
cutoff
:
Double
=
0.0
,
filterSameNames
:
Boolean
=
true
)
extends
AbstractArgs
filterSameNames
:
Boolean
=
true
,
rowSampleRegex
:
Option
[
Regex
]
=
None
,
columnSampleRegex
:
Option
[
Regex
]
=
None
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
opt
[
File
](
'i'
,
"input"
)
required
()
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
...
...
@@ -32,7 +35,12 @@ object FindOverlapMatch extends ToolCommand {
opt
[
Unit
](
"use_same_names"
)
unbounded
()
valueName
"<value>"
action
{
(
x
,
c
)
=>
c
.
copy
(
filterSameNames
=
false
)
}
text
"Do not compare samples with the same name"
opt
[
String
](
"rowSampleRegex"
)
unbounded
()
valueName
"<regex>"
action
{
(
x
,
c
)
=>
c
.
copy
(
rowSampleRegex
=
Some
(
x
.
r
))
}
text
"Samples in the row should match this regex"
opt
[
String
](
"columnSampleRegex"
)
unbounded
()
valueName
"<regex>"
action
{
(
x
,
c
)
=>
c
.
copy
(
columnSampleRegex
=
Some
(
x
.
r
))
}
text
"Samples in the column should match this regex"
}
/**
...
...
@@ -58,19 +66,19 @@ object FindOverlapMatch extends ToolCommand {
case
_
=>
sys
.
process
.
stdout
}
for
(
i1
<-
samplesColumnHeader
)
{
for
(
columnSample
<-
samplesColumnHeader
if
cmdArgs
.
columnSampleRegex
.
map
(
_
.
findFirstIn
(
columnSample
.
_1
).
isDefined
).
getOrElse
(
true
)
)
{
val
buffer
=
ListBuffer
[(
String
,
Double
)]()
for
(
i2
<-
samplesRowHeader
)
{
val
value
=
data
(
i1
.
_2
)(
i2
.
_2
).
toDouble
if
(
value
>=
cmdArgs
.
cutoff
&&
(!
cmdArgs
.
filterSameNames
||
i1
.
_2
!=
i2
.
_2
))
{
buffer
.+=((
i2
.
_1
,
value
))
for
(
rowSample
<-
samplesRowHeader
if
cmdArgs
.
rowSampleRegex
.
map
(
_
.
findFirstIn
(
rowSample
.
_1
).
isDefined
).
getOrElse
(
true
)
)
{
val
value
=
data
(
columnSample
.
_2
)(
rowSample
.
_2
).
toDouble
if
(
value
>=
cmdArgs
.
cutoff
&&
(!
cmdArgs
.
filterSameNames
||
columnSample
.
_2
!=
rowSample
.
_2
))
{
buffer
.+=((
rowSample
.
_1
,
value
))
}
}
if
(
buffer
.
nonEmpty
)
overlap
+=
1
else
noOverlap
+=
1
if
(
buffer
.
size
>
1
)
multiOverlap
+=
1
writer
.
println
(
s
"${
i1
._1}\t${buffer.mkString("
\
t
")}"
)
writer
.
println
(
s
"${
columnSample
._1}\t${buffer.mkString("
\
t
")}"
)
}
logger
.
info
(
s
"$overlap found"
)
logger
.
info
(
s
"no $noOverlap found"
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment