Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
B
biopet.biopet
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
Mirrors
biopet.biopet
Commits
6b740635
Commit
6b740635
authored
Apr 21, 2017
by
Sander Bollen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add zscore jobs
parent
fa86f834
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
173 additions
and
10 deletions
+173
-10
tarmac/src/main/resources/nl/lumc/sasc/biopet/pipelines/scripts/select_sample_from_matrix.py
...asc/biopet/pipelines/scripts/select_sample_from_matrix.py
+55
-0
tarmac/src/main/scala/nl/lumc/sasc/biopet/pipelines/tarmac/Tarmac.scala
...n/scala/nl/lumc/sasc/biopet/pipelines/tarmac/Tarmac.scala
+88
-10
tarmac/src/main/scala/nl/lumc/sasc/biopet/pipelines/tarmac/scripts/SampleFromMatrix.scala
...sc/biopet/pipelines/tarmac/scripts/SampleFromMatrix.scala
+30
-0
No files found.
tarmac/src/main/resources/nl/lumc/sasc/biopet/pipelines/scripts/select_sample_from_matrix.py
0 → 100644
View file @
6b740635
#!/usr/bin/env python3
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster
# which is running SGE. But other types of HPC that are supported by
# GATK Queue (such as PBS) should also be able to execute Biopet tools and
# pipelines.
#
# Copyright 2014-2017 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project is
# freely available for non-commercial use under an AGPL license;
# For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
import
argparse
def
xhmm_region_to_bed
(
region
):
"""Convert xhmm-style region to bed-style region."""
chromosome
,
interval
=
region
.
split
(
':'
)
start
,
end
=
interval
.
split
(
'-'
)
return
"{0}
\t
{1}
\t
{2}"
.
format
(
chromosome
,
start
,
end
)
if
__name__
==
"__main__"
:
desc
=
"""
Extract a sample from an XHMM-style matrix.
Will print (to stdout) a four-column bed file,
where the fourth column is the data field.
"""
parser
=
argparse
.
ArgumentParser
(
description
=
desc
)
parser
.
add_argument
(
'-I'
,
'--input'
,
required
=
True
,
type
=
str
,
help
=
'Path to input matrix'
)
parser
.
add_argument
(
'-s'
,
'--sample'
,
required
=
True
,
type
=
str
,
help
=
'Sample name to be extracted'
)
args
=
parser
.
parse_args
()
values
=
None
with
open
(
args
.
input
)
as
handle
:
header
=
next
(
handle
).
strip
().
split
(
'
\t
'
)[
1
:]
regions
=
[
xhmm_region_to_bed
(
x
)
for
x
in
header
]
for
line
in
handle
:
if
line
.
startswith
(
args
.
sample
):
values
=
line
.
strip
().
split
(
'
\t
'
)[
1
:]
break
if
values
is
not
None
:
for
reg
,
val
in
zip
(
regions
,
values
):
print
(
reg
+
'
\t
'
+
val
)
else
:
raise
ValueError
(
'sample {0} does exist'
.
format
(
args
.
sample
))
tarmac/src/main/scala/nl/lumc/sasc/biopet/pipelines/tarmac/Tarmac.scala
View file @
6b740635
...
...
@@ -7,8 +7,9 @@ import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import
nl.lumc.sasc.biopet.extensions.bedtools.BedtoolsSort
import
nl.lumc.sasc.biopet.extensions.
{
Bgzip
,
Gzip
,
Ln
,
Tabix
}
import
nl.lumc.sasc.biopet.extensions.gatk.DepthOfCoverage
import
nl.lumc.sasc.biopet.extensions.wisecondor.
{
WisecondorCount
,
WisecondorGcCorrect
,
WisecondorNewRef
}
import
nl.lumc.sasc.biopet.extensions.xhmm.XhmmMergeGatkDepths
import
nl.lumc.sasc.biopet.extensions.wisecondor.
{
WisecondorCount
,
WisecondorGcCorrect
,
WisecondorNewRef
,
WisecondorZscore
}
import
nl.lumc.sasc.biopet.extensions.xhmm.
{
XhmmMatrix
,
XhmmMergeGatkDepths
,
XhmmNormalize
,
XhmmPca
}
import
nl.lumc.sasc.biopet.pipelines.tarmac.scripts.SampleFromMatrix
import
nl.lumc.sasc.biopet.utils.Logging
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.queue.QScript
...
...
@@ -68,16 +69,29 @@ class Tarmac(val parent: Configurable) extends QScript with PedigreeQscript with
val
wisecondorRefJobs
=
refMap
map
{
case
(
sample
,
refSamples
)
=>
val
refDir
=
new
File
(
new
File
(
outputDir
,
sample
.
sampleId
),
"wisecondor_reference"
)
createWisecondorReferenceJobs
(
refSamples
,
refDir
)
sample
->
createWisecondorReferenceJobs
(
refSamples
,
refDir
)
}
val
xhmmRefJobs
=
refMap
map
{
case
(
sample
,
refSamples
)
=>
val
refDir
=
new
File
(
new
File
(
outputDir
,
sample
.
sampleId
),
"xhmm_reference"
)
createXhmmReferenceJobs
(
sample
,
refSamples
,
refDir
)
sample
->
createXhmmReferenceJobs
(
sample
,
refSamples
,
refDir
)
}
(
wisecondorRefJobs
.
toList
:::
xhmmRefJobs
.
toList
).
flatten
.
foreach
(
job
=>
add
(
job
))
val
wisecondorZJobs
=
wisecondorRefJobs
map
{
case
(
sample
,
jobsAndRefFile
)
=>
val
refJobs
=
jobsAndRefFile
.
_1
val
refFile
=
jobsAndRefFile
.
_2
val
tbiFile
=
refJobs
.
collect
{
case
tbi
:
Tabix
=>
tbi
.
outputIndex
}.
head
sample
->
createWisecondorZScore
(
sample
,
refFile
,
tbiFile
)
}
val
xhmmZJobs
=
xhmmRefJobs
map
{
case
(
sample
,
jobsAndRefFile
)
=>
sample
->
createXhmmZscore
(
sample
,
jobsAndRefFile
.
_2
)
}
addAll
(
wisecondorZJobs
.
values
.
toList
:::
(
xhmmZJobs
.
values
.
map
(
_
.
_1
).
toList
:::
wisecondorRefJobs
.
values
.
map
(
_
.
_1
).
toList
:::
xhmmRefJobs
.
values
.
map
(
_
.
_1
).
toList
).
flatten
)
}
/**
...
...
@@ -105,16 +119,25 @@ class Tarmac(val parent: Configurable) extends QScript with PedigreeQscript with
}
}
def
createXhmmReferenceJobs
(
sample
:
Sample
,
referenceSamples
:
Set
[
Sample
],
outputDirectory
:
File
)
:
List
[
QFunction
]
=
{
/*
Create jobs for Xhmm reference creation
Returns both jobs and the resulting reference file
*/
def
createXhmmReferenceJobs
(
sample
:
Sample
,
referenceSamples
:
Set
[
Sample
],
outputDirectory
:
File
)
:
Tuple2
[
List
[
QFunction
]
,
File
]
=
{
/* XHMM requires refset including self */
val
totalSet
=
referenceSamples
+
sample
val
merger
=
new
XhmmMergeGatkDepths
(
this
)
merger
.
gatkDepthsFiles
=
totalSet
.
map
(
_
.
outputXhmmCountFile
).
collect
{
case
\/-(
file
)
=>
file
}.
toList
merger
.
output
=
new
File
(
outputDirectory
,
"reference.matrix"
)
List
(
merger
)
val
refFile
=
new
File
(
outputDirectory
,
"reference.matrix"
)
merger
.
output
=
refFile
(
List
(
merger
),
refFile
)
}
def
createWisecondorReferenceJobs
(
referenceSamples
:
Set
[
Sample
],
outputDirectory
:
File
)
:
List
[
QFunction
]
=
{
/*
Create jobs for wisecondor reference creation
Returns both jobs and the resulting reference file
*/
def
createWisecondorReferenceJobs
(
referenceSamples
:
Set
[
Sample
],
outputDirectory
:
File
)
:
Tuple2
[
List
[
QFunction
]
,
File
]
=
{
val
gccs
=
referenceSamples
.
map
(
_
.
outputWisecondorGccFile
).
collect
{
case
\/-(
file
)
=>
file
}.
toList
val
reference
=
new
WisecondorNewRef
(
this
)
reference
.
inputBeds
=
gccs
...
...
@@ -129,7 +152,62 @@ class Tarmac(val parent: Configurable) extends QScript with PedigreeQscript with
gzipRef
.
input
=
List
(
sort
.
output
)
gzipRef
.
output
=
refFile
val
tabix
=
Tabix
(
this
,
refFile
)
reference
::
sort
::
gzipRef
::
tabix
::
Nil
(
reference
::
sort
::
gzipRef
::
tabix
::
Nil
,
refFile
)
}
def
createWisecondorZScore
(
sample
:
Sample
,
referenceFile
:
File
,
tbiFile
:
File
)
:
WisecondorZscore
=
{
val
zscore
=
new
WisecondorZscore
(
this
)
val
zFile
=
new
File
(
sample
.
sampleDir
,
s
"${sample.sampleId}.z.bed"
)
zscore
.
inputBed
=
sample
.
outputWisecondorGzFile
.
getOrElse
(
new
File
(
""
))
zscore
.
deps
++=
sample
.
outputWisecondorTbiFile
.
toList
zscore
.
deps
:+=
tbiFile
zscore
.
referenceDictionary
=
referenceFile
zscore
.
output
=
zFile
zscore
}
def
createXhmmZscore
(
sample
:
Sample
,
referenceMatrix
:
File
)
:
Tuple2
[
List
[
QFunction
]
,
File
]
=
{
// the filtered and centered matrix
val
filtMatrix
=
new
XhmmMatrix
(
this
)
filtMatrix
.
inputMatrix
=
referenceMatrix
filtMatrix
.
outputMatrix
=
new
File
(
sample
.
sampleDir
,
s
"${sample.sampleId}.filtered-centered.matrix"
)
filtMatrix
.
outputExcludedSamples
=
Some
(
new
File
(
sample
.
sampleDir
,
s
"${sample.sampleId}.filtered-samples.txt"
))
filtMatrix
.
outputExcludedTargets
=
Some
(
new
File
(
sample
.
sampleDir
,
s
"${sample.sampleId}.filtered-targets.txt"
))
add
(
filtMatrix
)
// pca generation
val
pca
=
new
XhmmPca
(
this
)
pca
.
inputMatrix
=
filtMatrix
.
outputMatrix
pca
.
pcaFile
=
new
File
(
sample
.
sampleDir
,
s
"${sample.sampleId}.pca.matrix"
)
add
(
pca
)
// normalization
val
normalize
=
new
XhmmNormalize
(
this
)
normalize
.
inputMatrix
=
filtMatrix
.
outputMatrix
normalize
.
pcaFile
=
pca
.
pcaFile
normalize
.
normalizeOutput
=
new
File
(
sample
.
sampleDir
,
s
"${sample.sampleId}.normalized.matrix"
)
add
(
normalize
)
// create matrix of zscores
val
zMatrix
=
new
XhmmMatrix
(
this
)
zMatrix
.
inputMatrix
=
normalize
.
normalizeOutput
zMatrix
.
centerData
=
true
zMatrix
.
centerType
=
"sample"
zMatrix
.
zScoreData
=
true
zMatrix
.
outputExcludedTargets
=
Some
(
new
File
(
sample
.
sampleDir
,
s
"${sample.sampleId}.z-filtered-targets.txt"
))
zMatrix
.
outputExcludedSamples
=
Some
(
new
File
(
sample
.
sampleDir
,
s
"${sample.sampleId}.z-filtered-samples.txt"
))
zMatrix
.
outputMatrix
=
new
File
(
sample
.
sampleDir
,
"zscores.matrix"
)
add
(
zMatrix
)
// select sample from matrix
val
selector
=
new
SampleFromMatrix
(
this
)
selector
.
sample
=
sample
.
sampleId
selector
.
inputMatrix
=
zMatrix
.
outputMatrix
val
zscoreFile
=
new
File
(
sample
.
sampleDir
,
s
"${sample.sampleId}.zscore-xhmm.bed"
)
selector
.
output
=
Some
(
zscoreFile
)
(
selector
::
zMatrix
::
normalize
::
pca
::
filtMatrix
::
Nil
,
zscoreFile
)
}
class
Sample
(
name
:
String
)
extends
AbstractSample
(
name
)
{
...
...
tarmac/src/main/scala/nl/lumc/sasc/biopet/pipelines/tarmac/scripts/SampleFromMatrix.scala
0 → 100644
View file @
6b740635
package
nl.lumc.sasc.biopet.pipelines.tarmac.scripts
import
java.io.File
import
nl.lumc.sasc.biopet.core.extensions.PythonCommandLineFunction
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Argument
,
Input
,
Output
}
/**
* Created by Sander Bollen on 21-4-17.
*/
class
SampleFromMatrix
(
val
parent
:
Configurable
)
extends
PythonCommandLineFunction
{
setPythonScript
(
"select_sample_from_matrix.py"
)
@Input
var
inputMatrix
:
File
=
_
@Argument
var
sample
:
String
=
_
@Output
(
required
=
false
)
var
output
:
Option
[
File
]
=
None
def
cmdLine
:
String
=
{
getPythonCommand
+
required
(
"-I"
,
inputMatrix
)
+
required
(
"-s"
,
sample
)
+
(
if
(
outputAsStsout
)
""
else
" > "
+
required
(
output
))
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment