Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
155cf263
Commit
155cf263
authored
Jul 10, 2017
by
pjvan_thof
Browse files
Merge remote-tracking branch 'remotes/origin/develop' into fix-BIOPET-492
parents
e79bf279
4626aad3
Changes
31
Hide whitespace changes
Inline
Side-by-side
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala
View file @
155cf263
...
...
@@ -90,4 +90,4 @@ class BiopetPipe(val commands: List[BiopetCommandLineFunction]) extends BiopetCo
super
.
freezeFieldValues
()
commands
.
foreach
(
_
.
qSettings
=
qSettings
)
}
}
}
\ No newline at end of file
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala
View file @
155cf263
...
...
@@ -124,11 +124,11 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
lazy
val
father
=
{
val
g
:
Option
[
String
]
=
sampleTags
.
get
(
"father"
).
map
(
_
.
toString
)
g
.
foreach
{
father
=>
if
(
sampleId
!
=
father
)
Logging
.
addError
(
s
"Father for $sampleId can not be itself"
)
if
(
samples
.
contains
(
father
))
if
(
samples
(
father
).
gender
=
=
Gender
.
Male
)
Logging
.
addError
(
s
"Father of $sampleId is not a
fe
male"
)
else
logger
.
warn
(
s
"For sample '$sampleId' is father '$father' not found in config"
)
if
(
sampleId
=
=
father
)
Logging
.
addError
(
s
"Father for $sampleId can not be itself"
)
if
(
samples
.
contains
(
father
))
{
if
(
samples
(
father
).
gender
!
=
Gender
.
Male
)
Logging
.
addError
(
s
"Father of $sampleId is not a male"
)
}
else
logger
.
warn
(
s
"For sample '$sampleId'
,
i
t
s father '$father' not found in config"
)
}
g
}
...
...
@@ -136,15 +136,19 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
lazy
val
mother
=
{
val
g
:
Option
[
String
]
=
sampleTags
.
get
(
"mother"
).
map
(
_
.
toString
)
g
.
foreach
{
mother
=>
if
(
sampleId
!
=
mother
)
Logging
.
addError
(
s
"mother for $sampleId can not be itself"
)
if
(
samples
.
contains
(
mother
))
if
(
samples
(
mother
).
gender
=
=
Gender
.
Female
)
if
(
sampleId
=
=
mother
)
Logging
.
addError
(
s
"mother for $sampleId can not be itself"
)
if
(
samples
.
contains
(
mother
))
{
if
(
samples
(
mother
).
gender
!
=
Gender
.
Female
)
Logging
.
addError
(
s
"Mother of $sampleId is not a female"
)
else
logger
.
warn
(
s
"For sample '$sampleId' is mother '$mother' not found in config"
)
}
else
logger
.
warn
(
s
"For sample '$sampleId'
,
i
t
s mother '$mother' not found in config"
)
}
g
}
lazy
val
family
:
Option
[
String
]
=
{
sampleTags
.
get
(
"family"
).
map
(
_
.
toString
)
}
lazy
val
sampleGroups
:
List
[
String
]
=
sampleTags
.
get
(
"groups"
)
match
{
case
Some
(
g
:
List
[
_
])
=>
g
.
map
(
_
.
toString
)
case
Some
(
g
:
String
)
=>
List
(
g
)
...
...
@@ -312,4 +316,4 @@ object MultiSampleQScript {
val
Male
,
Female
,
Unknown
=
Value
}
}
}
\ No newline at end of file
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/PedigreeQscript.scala
0 → 100644
View file @
155cf263
package
nl.lumc.sasc.biopet.core
import
java.io.PrintWriter
import
nl.lumc.sasc.biopet.core.PedigreeQscript.PedMergeStrategy
import
nl.lumc.sasc.biopet.utils.Logging
import
org.broadinstitute.gatk.queue.QScript
import
scala.io.Source
/**
* Created by Sander Bollen on 28-3-17.
*
* A multi-sample Qscript with additional Pedigree information.
* Pedigrees follow the PED standard.
* See: http://zzz.bwh.harvard.edu/plink/data.shtml#ped
*
* Pedigrees may be parsed from the sample config and/or a supplied PED file.
*/
trait
PedigreeQscript
extends
MultiSampleQScript
{
qscript
:
QScript
=>
/* Optionally parse from ped file */
def
ped
:
Option
[
File
]
=
config
(
"ped_file"
,
default
=
None
)
/* The merge stategy to use when we have both a ped file and sample tag information */
def
mergeStrategy
:
PedMergeStrategy.Value
=
PedMergeStrategy
.
Concatenate
/**
* Case class representing a PED samples
* For the PED format, see:
* http://zzz.bwh.harvard.edu/plink/data.shtml#ped
* @param familyId family id
* @param individualId individual id
* @param paternalId Optional paternal id
* @param maternalId Optional maternal id
* @param gender gender
* @param affectedPhenotype Optional boolean
* @param genotypeFields optional genotype fields
*/
case
class
PedSample
(
familyId
:
String
,
individualId
:
String
,
paternalId
:
Option
[
String
],
maternalId
:
Option
[
String
],
gender
:
MultiSampleQScript.Gender.Value
,
affectedPhenotype
:
Option
[
Boolean
],
genotypeFields
:
List
[
String
]
=
Nil
)
lazy
val
pedSamples
:
List
[
PedSample
]
=
{
mergeStrategy
match
{
case
PedMergeStrategy
.
Concatenate
=>
pedSamplesFromConfig
()
:::
parsePedFile
()
case
_
=>
throw
new
NotImplementedError
()
// todo
}
}
/**
* Get affected samples
*
* @return List[PedSample]
*/
def
getIndexSamples
:
List
[
PedSample
]
=
{
pedSamples
.
filter
(
x
=>
x
.
affectedPhenotype
.
contains
(
true
))
}
/**
* Get all samples for which mother and father are defined
* @return
*/
def
getChildren
:
List
[
PedSample
]
=
{
pedSamples
.
filter
(
x
=>
x
.
maternalId
.
isDefined
&&
x
.
paternalId
.
isDefined
)
}
/**
* Get pedSamples from sample tags in config
* May return empty list if no pedigree can be constructed
* PedSamples can only be constructed for those samples where family is defined
* Furthermore, if a father or mother is given, another sample with this id must also exist
*
* @return
*/
def
pedSamplesFromConfig
()
:
List
[
PedSample
]
=
{
val
withFam
=
samples
.
values
.
filter
(
_
.
family
.
isDefined
)
val
sampleIds
=
withFam
.
map
(
_
.
sampleId
).
toSet
val
fathers
=
withFam
.
flatMap
(
_
.
father
)
val
mothers
=
withFam
.
flatMap
(
_
.
mother
)
fathers
.
foreach
{
f
=>
if
(!
sampleIds
.
contains
(
f
))
{
Logging
.
addError
(
s
"Father $f does not exist in samples"
)
}
}
mothers
.
foreach
{
m
=>
if
(!
sampleIds
.
contains
(
m
))
{
Logging
.
addError
(
s
"Mother $m does not exist in samples"
)
}
}
withFam
.
map
{
s
=>
PedSample
(
s
.
family
.
get
,
s
.
sampleId
,
s
.
father
,
s
.
mother
,
s
.
gender
,
None
)
}.
toList
}
/* Parse ped file to list of PedSamples */
def
parsePedFile
()
:
List
[
PedSample
]
=
{
ped
match
{
case
Some
(
p
)
=>
Source
.
fromFile
(
p
)
.
getLines
()
.
map
{
x
=>
parseSinglePedLine
(
x
)
}
.
toList
case
_
=>
Nil
}
}
/* Parse a single Ped line to a PedSample */
def
parseSinglePedLine
(
line
:
String
)
:
PedSample
=
{
val
arr
=
line
.
split
(
"\\s"
)
var
genotypeFields
:
List
[
String
]
=
Nil
if
(
arr
.
size
<
6
)
{
throw
new
IllegalArgumentException
(
"Ped file contains less than 6 columns"
)
}
else
if
(
arr
.
length
==
6
)
{
genotypeFields
=
Nil
}
else
{
genotypeFields
=
arr
.
drop
(
6
).
toList
}
val
gender
=
arr
(
4
)
match
{
case
"1"
=>
MultiSampleQScript
.
Gender
.
Male
case
"2"
=>
MultiSampleQScript
.
Gender
.
Female
case
_
=>
MultiSampleQScript
.
Gender
.
Unknown
}
val
affected
=
arr
(
5
)
match
{
case
"1"
=>
Some
(
false
)
case
"2"
=>
Some
(
true
)
case
_
=>
None
}
val
paternalId
=
arr
(
2
)
match
{
case
"0"
=>
None
case
otherwise
=>
Some
(
otherwise
)
}
val
maternalId
=
arr
(
3
)
match
{
case
"0"
=>
None
case
otherwise
=>
Some
(
otherwise
)
}
PedSample
(
arr
(
0
),
arr
(
1
),
paternalId
,
maternalId
,
gender
,
affected
,
genotypeFields
)
}
/* Check whether sample is a mother */
def
isMother
(
pedSample
:
PedSample
)
:
Boolean
=
{
val
motherIds
=
pedSamples
.
flatMap
(
_
.
maternalId
)
motherIds
.
contains
(
pedSample
.
individualId
)
}
/* Check whether sample is a father */
def
isFather
(
pedSample
:
PedSample
)
:
Boolean
=
{
val
fatherIds
=
pedSamples
.
flatMap
(
_
.
paternalId
)
fatherIds
.
contains
(
pedSample
.
individualId
)
}
/**
* Convenience method for checking whether current pedigree constitutes a single patient pedigree
*/
def
isSingle
:
Boolean
=
{
pedSamples
.
size
==
1
&&
pedSamples
.
head
.
maternalId
.
isEmpty
&&
pedSamples
.
head
.
paternalId
.
isEmpty
}
/**
* Convenience method for checking whether current pedigree constitutes a trio pedigree
*/
def
isTrio
:
Boolean
=
{
getIndexSamples
.
size
==
1
&&
pedSamples
.
size
==
3
&&
(
getIndexSamples
.
head
.
maternalId
.
isDefined
&&
pedSamples
.
map
(
_
.
individualId
)
.
contains
(
getIndexSamples
.
head
.
maternalId
.
get
))
&&
(
getIndexSamples
.
head
.
paternalId
.
isDefined
&&
pedSamples
.
map
(
_
.
individualId
)
.
contains
(
getIndexSamples
.
head
.
paternalId
.
get
))
}
/**
* Write list of ped samples to a PED file
*
* For the PED format see:
* http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml
*/
def
writeToPedFile
(
outFile
:
File
)
:
Unit
=
{
val
writer
=
new
PrintWriter
(
outFile
)
pedSamples
.
foreach
{
p
=>
val
paternalField
=
p
.
paternalId
match
{
case
Some
(
s
)
=>
s
case
_
=>
"0"
}
val
maternalField
=
p
.
maternalId
match
{
case
Some
(
s
)
=>
s
case
_
=>
"0"
}
val
genderField
=
p
.
gender
match
{
case
MultiSampleQScript
.
Gender
.
Male
=>
"1"
case
MultiSampleQScript
.
Gender
.
Female
=>
"2"
case
_
=>
"-9"
}
val
affectedField
=
p
.
affectedPhenotype
match
{
case
Some
(
b
)
=>
b
match
{
case
true
=>
"2"
case
_
=>
"1"
}
case
_
=>
"0"
}
val
mainLine
:
String
=
s
"${p.familyId}\t${p.individualId}\t$paternalField\t$maternalField\t$genderField\t$affectedField"
val
line
=
if
(
p
.
genotypeFields
.
nonEmpty
)
{
mainLine
+
"\t"
+
p
.
genotypeFields
.
mkString
(
"\t"
)
}
else
mainLine
writer
.
write
(
line
+
"\n"
)
}
writer
.
close
()
}
}
object
PedigreeQscript
{
object
PedMergeStrategy
extends
Enumeration
{
val
Concatenate
,
Update
,
ConcatenatedAndUpdate
=
Value
}
}
biopet-core/src/test/resources/full.ped
0 → 100644
View file @
155cf263
fam01 sample1 sample2 sample3 2 0
fam01 sample2 0 0 1 0
fam01 sample3 0 0 2 0
fam02 sample4 sample5 sample6 2 2
fam02 sample5 0 0 1 1
fam02 sample6 0 0 2 1
biopet-core/src/test/resources/trio.ped
0 → 100644
View file @
155cf263
fam02 sample4 sample5 sample6 2 2
fam02 sample5 0 0 1 1
fam02 sample6 0 0 2 1
biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/PedigreeQScriptTest.scala
0 → 100644
View file @
155cf263
package
nl.lumc.sasc.biopet.core
import
java.io.File
import
java.nio.file.Paths
import
nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import
nl.lumc.sasc.biopet.core.extensions.Md5sum
import
nl.lumc.sasc.biopet.utils.config.Config
import
nl.lumc.sasc.biopet.utils.
{
ConfigUtils
,
Logging
}
import
org.broadinstitute.gatk.queue.
{
QScript
,
QSettings
}
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
import
scala.language.reflectiveCalls
import
scala.collection.mutable.ListBuffer
import
scala.io.Source
/**
* Created by Sander Bollen on 11-4-17.
*/
class
PedigreeQScriptTest
extends
TestNGSuite
with
Matchers
{
import
PedigreeQScriptTest._
@Test
def
testConfigPedigree
()
:
Unit
=
{
val
script
=
PedigreeQScriptTest
(
sample1
::
sample2
::
sample3
::
Nil
)
script
.
init
()
script
.
biopetScript
()
script
.
pedSamples
.
size
shouldBe
3
script
.
pedSamples
.
map
(
_
.
individualId
).
contains
(
"sample1"
)
shouldBe
true
script
.
pedSamples
.
map
(
_
.
individualId
).
contains
(
"sample2"
)
shouldBe
true
script
.
pedSamples
.
map
(
_
.
individualId
).
contains
(
"sample3"
)
shouldBe
true
}
@Test
def
testGenderCorrect
()
:
Unit
=
{
val
script
=
PedigreeQScriptTest
(
sample2
::
Nil
)
script
.
init
()
script
.
biopetScript
()
script
.
pedSamples
.
size
shouldBe
1
script
.
pedSamples
.
head
.
gender
shouldEqual
Gender
.
Male
val
script2
=
PedigreeQScriptTest
(
sample3
::
Nil
)
script2
.
init
()
script2
.
biopetScript
()
script2
.
pedSamples
.
size
shouldBe
1
script2
.
pedSamples
.
head
.
gender
shouldEqual
Gender
.
Female
}
@Test
def
testIsSingle
()
:
Unit
=
{
val
script
=
PedigreeQScriptTest
(
sample2
::
Nil
)
script
.
init
()
script
.
biopetScript
()
script
.
isSingle
shouldBe
true
}
@Test
def
testPedParsing
()
:
Unit
=
{
val
script
=
PedigreeQScriptTest
(
trioPed
::
Nil
)
script
.
init
()
script
.
biopetScript
()
script
.
pedSamples
.
size
shouldBe
3
script
.
pedSamples
.
map
(
_
.
individualId
).
contains
(
"sample4"
)
shouldBe
true
script
.
pedSamples
.
map
(
_
.
individualId
).
contains
(
"sample5"
)
shouldBe
true
script
.
pedSamples
.
map
(
_
.
individualId
).
contains
(
"sample6"
)
shouldBe
true
}
@Test
def
testIsMother
()
=
{
val
script
=
PedigreeQScriptTest
(
trioPed
::
Nil
)
script
.
init
()
script
.
biopetScript
()
script
.
isMother
(
script
.
pedSamples
.
filter
(
_
.
individualId
==
"sample6"
).
head
)
shouldBe
true
}
@Test
def
testIsFather
()
=
{
val
script
=
PedigreeQScriptTest
(
trioPed
::
Nil
)
script
.
init
()
script
.
biopetScript
()
script
.
isFather
(
script
.
pedSamples
.
filter
(
_
.
individualId
==
"sample5"
).
head
)
shouldBe
true
}
@Test
def
testIsTrio
()
=
{
val
script
=
PedigreeQScriptTest
(
trioPed
::
Nil
)
script
.
init
()
script
.
biopetScript
()
script
.
isTrio
shouldBe
true
val
script2
=
PedigreeQScriptTest
(
sample1
::
sample2
::
sample3
::
Nil
)
script2
.
init
()
script2
.
biopetScript
()
script2
.
isTrio
shouldBe
false
}
@Test
def
testConcatenation
()
=
{
val
script
=
PedigreeQScriptTest
(
sample1
::
sample2
::
sample3
::
trioPed
::
Nil
)
script
.
init
()
script
.
biopetScript
()
script
.
pedSamples
.
size
shouldBe
6
script
.
pedSamples
.
map
(
_
.
individualId
).
contains
(
"sample1"
)
shouldBe
true
script
.
pedSamples
.
map
(
_
.
individualId
).
contains
(
"sample2"
)
shouldBe
true
script
.
pedSamples
.
map
(
_
.
individualId
).
contains
(
"sample3"
)
shouldBe
true
script
.
pedSamples
.
map
(
_
.
individualId
).
contains
(
"sample4"
)
shouldBe
true
script
.
pedSamples
.
map
(
_
.
individualId
).
contains
(
"sample5"
)
shouldBe
true
script
.
pedSamples
.
map
(
_
.
individualId
).
contains
(
"sample6"
)
shouldBe
true
}
@Test
def
testWritePedFile
()
:
Unit
=
{
val
script
=
PedigreeQScriptTest
(
sample1
::
sample2
::
sample3
::
trioPed
::
Nil
)
script
.
init
()
script
.
biopetScript
()
val
tmpFile
=
File
.
createTempFile
(
"test"
,
".ped"
)
tmpFile
.
deleteOnExit
()
script
.
writeToPedFile
(
tmpFile
)
val
expectedLines
=
Source
.
fromFile
(
resourcePath
(
"/full.ped"
)).
getLines
().
toList
.
sorted
val
writtenLines
=
Source
.
fromFile
(
tmpFile
).
getLines
().
toList
.
sorted
writtenLines
shouldEqual
expectedLines
}
}
object
PedigreeQScriptTest
{
private
def
resourcePath
(
p
:
String
)
:
String
=
{
Paths
.
get
(
getClass
.
getResource
(
p
).
toURI
).
toString
}
val
sample1
=
Map
(
"samples"
->
Map
(
"sample1"
->
Map
(
"tags"
->
Map
(
"gender"
->
"female"
,
"father"
->
"sample2"
,
"mother"
->
"sample3"
,
"family"
->
"fam01"
))))
val
sample2
=
Map
(
"samples"
->
Map
(
"sample2"
->
Map
(
"tags"
->
Map
(
"gender"
->
"male"
,
"family"
->
"fam01"
))))
val
sample3
=
Map
(
"samples"
->
Map
(
"sample3"
->
Map
(
"tags"
->
Map
(
"gender"
->
"female"
,
"family"
->
"fam01"
))))
val
trioPed
=
Map
(
"ped_file"
->
resourcePath
(
"/trio.ped"
))
def
apply
(
configs
:
List
[
Map
[
String
,
Any
]],
only
:
List
[
String
]
=
Nil
)
=
{
new
QScript
with
PedigreeQscript
{
qscript
=>
qSettings
=
new
QSettings
()
qSettings
.
runName
=
"test"
override
val
onlySamples
=
only
var
buffer
=
new
ListBuffer
[
String
]()
override
def
globalConfig
=
new
Config
(
configs
.
foldLeft
(
Map
[
String
,
Any
]())
{
case
(
a
,
b
)
=>
ConfigUtils
.
mergeMaps
(
a
,
b
)
})
val
parent
=
null
def
getLastLogMessage
:
String
=
{
Logging
.
errors
.
toList
.
last
.
getMessage
}
class
Sample
(
id
:
String
)
extends
AbstractSample
(
id
)
{
class
Library
(
id
:
String
)
extends
AbstractLibrary
(
id
)
{
/** Function that add library jobs */
protected
def
addJobs
()
:
Unit
=
{
buffer
+=
config
(
"test"
)
}
/** Must return files to store into summary */
def
summaryFiles
:
Map
[
String
,
File
]
=
Map
()
/** Must returns stats to store into summary */
def
summaryStats
=
Map
()
}
/**
* Factory method for Library class
* @param id SampleId
* @return Sample class
*/
def
makeLibrary
(
id
:
String
)
:
Library
=
new
Library
(
id
)
/** Function to add sample jobs */
protected
def
addJobs
()
:
Unit
=
{
buffer
+=
s
"$sampleId"
addPerLibJobs
()
add
(
new
Md5sum
(
qscript
))
}
/** Must return files to store into summary */
def
summaryFiles
:
Map
[
String
,
File
]
=
Map
()
/** Must returns stats to store into summary */
def
summaryStats
=
Map
()
}
/**
* Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
*/
def
addMultiSampleJobs
()
:
Unit
=
{
add
(
new
Md5sum
(
qscript
))
}
/**
* Factory method for Sample class
* @param id SampleId
* @return Sample class
*/
def
makeSample
(
id
:
String
)
:
Sample
=
new
Sample
(
id
)
/** Must return a map with used settings for this pipeline */
def
summarySettings
:
Map
[
String
,
Any
]
=
Map
()
/** File to put in the summary for thie pipeline */
def
summaryFiles
:
Map
[
String
,
File
]
=
Map
()
/** Name of summary output file */
def
summaryFile
:
File
=
new
File
(
"./summary.json"
)
/** Init for pipeline */
def
init
()
:
Unit
=
{}
/** Pipeline itself */
def
biopetScript
()
:
Unit
=
{
addSamplesJobs
()
addSummaryJobs
()
}
}
}
}
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala
View file @
155cf263
...
...
@@ -27,15 +27,23 @@ class BedtoolsMerge(val parent: Configurable) extends Bedtools {
@Input
(
doc
=
"Input bed file"
)
var
input
:
File
=
_
@Argument
(
doc
=
"Distance"
)
var
dist
:
Option
[
Int
]
=
config
(
"dist"
)
//default of tool is 1
@Argument
(
doc
=
"Distance"
,
required
=
false
)
var
dist
:
Option
[
Int
]
=
config
(
"dist"
,
default
=
1
)
//default of tool is 1
@Output
(
doc
=
"Output bed file"
)
var
output
:
File
=
_
@Argument
(
doc
=
"operation to additional columns"
,
required
=
false
)
var
operation
:
Option
[
String
]
=
None